"""
Medium MCP Server UI - Project Aether Edition v5.0
A premium Gradio interface for the Medium MCP Server with:
- 6 functional tabs for all 10 MCP tools
- Project Aether glassmorphism design (Prose Typography & Full Width)
- ElevenLabs voice selection with 43 voices
- PDF Export (Robust: WeasyPrint -> Playwright Fallback)
- Dynamic Settings & Tag Search
- Real Analyst Intelligence (Gemini 1.5 Flash) with fallback Search
- Unified Preview/Output View via Iframe
"""
import gradio as gr
import asyncio
import os
import sys
import json
import markdown
import shutil
import datetime
import uuid
import time
from pathlib import Path
from dotenv import load_dotenv
# Check dependencies
WEASYPRINT_AVAILABLE = False
try:
# Suppress stderr during weasyprint import to avoid scary DLL warnings
import logging
logging.getLogger("weasyprint").setLevel(logging.ERROR)
from weasyprint import HTML as WP_HTML, CSS as WP_CSS
# Try initialize to catch DLL errors early (silent check)
try:
WP_HTML(string="
test
").write_pdf()
WEASYPRINT_AVAILABLE = True
except Exception:
WEASYPRINT_AVAILABLE = False
except Exception:
WEASYPRINT_AVAILABLE = False
# Fix for Windows Asyncio Loop (prevents some 10054 errors)
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
# Load environment variables
load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
# No sys.path needed - src/ is in same project now
# Import from server and scraper
from src.service import ScraperService
# Import renderer for explicit usage
from src.html_renderer import render_full_page, BASE_TEMPLATE as RENDERER_TEMPLATE
from src.utils import upgrade_medium_image_url
from src.config import MCPConfig
from elevenlabs_voices import ELEVENLABS_VOICES, VOICE_CATEGORIES, get_voice_id
# Import Gemini for Analyst (backup) - Using new google.genai SDK
from google import genai
from google.genai import types
# Import Groq for primary LLM
from groq import Groq
# ============================================================================
# PROJECT AETHER: VISUAL SYSTEM (ENHANCED)
# ============================================================================
AETHER_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;700&family=Inter:wght@300;400;600&family=JetBrains+Mono:wght@400;600&display=swap');
:root {
--aether-bg: #121212;
--aether-surface: #1e1e1e;
--aether-accent: #6366f1;
--aether-text: #ffffff;
--aether-text-muted: #a1a1aa;
--glass-bg: rgba(30, 30, 30, 0.6);
--glass-border: rgba(255, 255, 255, 0.1);
--status-green: #22c55e;
--status-red: #ef4444;
}
body, .gradio-container {
background-color: var(--aether-bg) !important;
font-family: 'Inter', sans-serif !important;
color: var(--aether-text) !important;
}
h1, h2, h3, .prose h1, .prose h2, .prose h3 {
font-family: 'Playfair Display', serif !important;
letter-spacing: -0.02em;
}
footer { display: none !important; }
.gradio-container { max-width: 100% !important; margin: 0; padding: 0 20px; }
/* Center Tab Navigation - Aggressive approach */
.tabs {
display: flex !important;
flex-direction: column !important;
align-items: center !important;
}
.tabs > .tab-nav {
justify-content: center !important;
}
.tabs > .tabitem {
width: 100% !important;
}
/* Tab Content Container - Consistent Width */
.tabitem { width: 100% !important; }
.tabitem > div { width: 100% !important; }
/* Discover Results Container - Prevent Layout Shift */
.discover-results, .scrape-results {
min-height: 400px;
width: 100% !important;
}
/* Skeleton Article Placeholder */
.skeleton-article {
background: var(--aether-surface);
border: 1px solid var(--glass-border);
border-radius: 12px;
padding: 40px;
min-height: 500px;
}
.skeleton-header {
height: 32px;
background: linear-gradient(90deg, #2a2a2a 25%, #3a3a3a 50%, #2a2a2a 75%);
background-size: 200% 100%;
animation: shimmer 1.5s infinite;
border-radius: 8px;
margin-bottom: 16px;
width: 70%;
}
.skeleton-line {
height: 16px;
background: linear-gradient(90deg, #2a2a2a 25%, #3a3a3a 50%, #2a2a2a 75%);
background-size: 200% 100%;
animation: shimmer 1.5s infinite;
border-radius: 4px;
margin-bottom: 12px;
}
.skeleton-line:nth-child(odd) { width: 100%; }
.skeleton-line:nth-child(even) { width: 85%; }
@keyframes shimmer {
0% { background-position: 200% 0; }
100% { background-position: -200% 0; }
}
/* Export Section - Premium Styling */
.export-control-bar {
/* Override Gradio Variables LOCALLY */
--input-background-fill: #0b0b0b !important;
--background-fill-primary: #0b0b0b !important;
--background-fill-secondary: #0b0b0b !important;
--block-background-fill: #0b0b0b !important;
background: rgba(30,30,30,0.85);
backdrop-filter: blur(20px);
border: 1px solid rgba(255,255,255,0.1);
border-radius: 100px;
padding: 0 16px;
margin-top: 24px;
display: flex !important;
flex-wrap: nowrap !important;
flex-direction: row !important;
align-items: center !important;
gap: 12px;
height: 80px !important;
max-height: 80px !important;
min-height: 80px !important;
box-shadow: 0 10px 40px rgba(0,0,0,0.5);
width: 100% !important;
overflow: visible !important; /* CRITICAL FIX: Allow Dropdown Overflow */
z-index: 9999 !important; /* Ensure on top */
}
/* NUCLEAR CHECKLIST:
1. Containers -> Transparent
2. Input -> Black
3. Junk (Loaders, Close btns, Error chips) -> Hidden
*/
.export-control-bar > *,
.export-control-bar .gr-block,
.export-control-bar .gr-box,
.export-control-bar .gr-form,
.export-control-bar .gr-input,
.export-control-bar .wrap,
.export-control-bar .contain,
.export-control-bar label {
background: transparent !important;
border: none !important;
margin: 0 !important;
padding: 0 !important;
flex-wrap: nowrap !important;
}
/* 1. Format Selector */
.export-control-bar .dropdown-wrap { background: transparent !important; border: none !important; }
.export-control-bar .gr-dropdown {
width: 100px !important;
flex-shrink: 0 !important;
overflow: visible !important;
}
.export-control-bar .gr-dropdown .wrap-inner {
border-radius: 50px !important;
background: rgba(255,255,255,0.08) !important;
border: 1px solid rgba(255,255,255,0.1) !important;
}
.export-control-bar .gr-dropdown input {
color: white !important;
text-align: center !important;
font-weight: 600 !important;
height: 48px !important;
}
.export-control-bar .gr-dropdown ul.options {
background: #1a1a1a !important;
border: 1px solid #333 !important;
z-index: 10000 !important;
}
/* 2. Download Button */
.export-control-bar .action-pill {
border-radius: 50px !important;
height: 48px !important;
background: var(--aether-accent) !important;
color: white !important;
font-weight: 700 !important;
margin-left: auto !important;
flex: 0 0 160px !important;
width: 160px !important;
}
/* 3. Text inputs (Status & Filename) */
/* 3. Text inputs (Status & Filename) - FORCE BLACK FIX */
/* 3. Text inputs (Status & Filename) - LIGHT MODE PERFECTION */
.export-control-bar textarea,
.export-control-bar textarea:disabled,
.export-control-bar textarea:read-only,
.export-control-bar textarea::placeholder {
background-color: #ffffff !important; /* White background */
background: #ffffff !important;
color: #000000 !important; /* Black text */
-webkit-text-fill-color: #000000 !important; /* CRITICAL for disabled inputs */
opacity: 1 !important;
border: 1px solid rgba(255,255,255,0.1) !important;
border-radius: 8px !important;
font-size: 13px !important;
text-align: right !important;
line-height: 80px !important;
height: 80px !important;
padding: 0 12px !important;
box-shadow: none !important;
overflow: hidden !important;
white-space: nowrap !important;
font-weight: 700 !important; /* Bolder text */
cursor: default !important;
pointer-events: none !important;
}
/* Force Wrapper Transparency */
.export-control-bar .block,
.export-control-bar .wrap,
.export-control-bar .gradio-container,
.export-control-bar label,
.export-control-bar .input-container {
background-color: transparent !important;
background: transparent !important;
border: none !important;
}
/* 4. REMOVE JUNK (Cross signs, Error chips, Loaders) */
.export-control-bar .loader,
.export-control-bar .loading,
.export-control-bar .meta-text,
.export-control-bar .progress-text,
.export-control-bar .clear-button, /* The 'Cross Sign' */
.export-control-bar .remove-button,
.export-control-bar .icon-button, /* Generic icon button (includes clear) */
.export-control-bar .toast-wrap, /* Error Chips */
.export-control-bar button[aria-label="Clear"] {
display: none !important;
width: 0 !important;
height: 0 !important;
opacity: 0 !important;
visibility: hidden !important;
pointer-events: none !important;
}
/* Flex area for middle content */
.export-control-bar .file-display-area,
.export-control-bar .status-display-area {
flex: 1 1 auto !important;
min-width: 0 !important;
display: flex;
justify-content: flex-end;
align-items: center;
}
/* Dropdown Arrow Fix */
.export-control-bar .icon { fill: white !important; }
/* File Helper - Make it blend in perfectly */
.export-control-bar .file-preview {
background: transparent !important;
border: none !important;
margin: 0 !important;
padding: 0 !important;
height: 100% !important;
display: flex;
align-items: center;
justify-content: flex-end;
color: #e5e5e5;
}
.export-control-bar .file-name { font-size: 13px; font-family: monospace; }
.export-control-bar .file-size { display: none !important; } /* Hide file size to save space */
/* Prose Styling for Markdown (Analyst Reports) */
.prose {
font-size: 1.1rem;
line-height: 1.75;
color: #e5e5e5;
}
.prose h1, .prose h2, .prose h3 { margin-top: 2em; margin-bottom: 0.5em; line-height: 1.25; font-family: 'Playfair Display', serif; }
.prose p { margin-top: 1.25em; margin-bottom: 1.25em; }
.prose a { color: var(--aether-accent); text-decoration: none; }
.prose a:hover { text-decoration: underline; }
.prose code {
background-color: #2d2d2d;
padding: 0.2em 0.4em;
border-radius: 4px;
font-family: 'JetBrains Mono', monospace;
font-size: 0.9em;
}
.prose pre {
background-color: #1a1a1a;
padding: 1.5em;
border-radius: 8px;
overflow-x: auto;
border: 1px solid var(--glass-border);
}
.prose pre code { background: none; padding: 0; color: #e5e5e5; }
.prose blockquote {
border-left: 4px solid var(--aether-accent);
padding-left: 1em;
font-style: italic;
color: #a1a1aa;
margin: 1.5em 0;
}
.prose table { width: 100%; border-collapse: collapse; margin: 2em 0; }
.prose th, .prose td { padding: 0.75em; border-bottom: 1px solid var(--glass-border); text-align: left; }
.prose th { font-weight: 600; color: #fff; }
/* Omnibar */
.omnibar {
border: none !important;
background: transparent !important;
box-shadow: none !important;
padding: 0 !important;
height: 60px !important;
}
.omnibar label { display: block !important; height: 100% !important; margin: 0 !important; padding: 0 !important; border: none !important; background: transparent !important; }
.omnibar label span { display: none !important; }
.omnibar textarea, .omnibar input {
background: var(--glass-bg) !important;
border: 1px solid var(--glass-border) !important;
backdrop-filter: blur(12px);
border-radius: 16px !important;
color: white !important;
font-size: 1.1rem !important;
padding: 16px 20px !important;
transition: all 0.3s ease;
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
height: 100% !important;
}
.omnibar textarea:focus, .omnibar input:focus {
border-color: var(--aether-accent) !important;
box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2), 0 8px 40px rgba(0, 0, 0, 0.2);
}
/* Action Pills */
.action-pill {
background: var(--aether-accent) !important;
border: none !important;
border-radius: 50px !important;
color: white !important;
font-weight: 600 !important;
text-transform: uppercase;
letter-spacing: 0.05em;
padding: 0 30px !important;
transition: all 0.2s ease;
height: 60px !important;
box-shadow: 0 4px 15px rgba(99, 102, 241, 0.3);
}
.action-pill:hover {
transform: scale(1.05);
box-shadow: 0 0 20px rgba(99, 102, 241, 0.4);
}
/* Cards */
.aether-card {
background: var(--aether-surface);
border: 1px solid var(--glass-border);
border-radius: 12px;
overflow: hidden;
transition: all 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275);
position: relative;
display: block;
text-decoration: none;
color: inherit;
height: 100%;
}
.aether-card:hover {
transform: translateY(-8px) rotateX(2deg);
box-shadow: 0 20px 40px rgba(0,0,0,0.4);
border-color: var(--aether-accent);
z-index: 10;
}
.aether-card-img {
height: 180px;
background-size: cover;
background-position: center;
position: relative;
}
.aether-card-img::after {
content: '';
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 60%;
background: linear-gradient(to top, rgba(0,0,0,0.8), transparent);
}
/* Digital Paper */
.digital-paper {
background: #050505;
color: #e5e5e5 !important;
padding: 60px;
border-radius: 8px;
border: 1px solid rgba(255, 255, 255, 0.15);
box-shadow: 0 20px 60px rgba(0,0,0,0.6);
font-family: 'Playfair Display', serif;
line-height: 1.9;
font-size: 1.1rem;
max-width: 100%; /* Full Width */
margin: 0 auto;
}
.digital-paper h1, .digital-paper h2, .digital-paper h3, .digital-paper p, .digital-paper li { color: #e5e5e5 !important; }
.digital-paper h1 { border-bottom: 1px solid rgba(255,255,255,0.2); padding-bottom: 20px; margin-bottom: 30px; font-size: 2.5rem; }
.digital-paper blockquote { border-left: 4px solid var(--aether-accent); padding-left: 20px; font-style: italic; color: #a1a1aa !important; }
/* Status Cards in Settings */
.status-card {
background: rgba(255,255,255,0.03);
border: 1px solid var(--glass-border);
border-radius: 12px;
padding: 20px;
display: flex;
align-items: center;
gap: 15px;
margin-bottom: 15px;
}
.status-card.active { border-color: var(--status-green); background: rgba(34, 197, 94, 0.05); }
.status-card.missing { border-color: var(--status-red); background: rgba(239, 68, 68, 0.05); }
/* Sonic Player */
.sonic-container {
background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(0,0,0,0));
border: 1px solid var(--glass-border);
border-radius: 24px;
padding: 30px;
backdrop-filter: blur(20px);
}
.album-art {
width: 100%;
aspect-ratio: 1/1;
background: linear-gradient(135deg, #6366f1, #8b5cf6);
border-radius: 20px;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 20px 50px -10px rgba(99, 102, 241, 0.5);
margin-bottom: 20px;
}
/* Full Width Iframe */
.full-width-iframe iframe {
width: 100% !important;
height: 85vh !important;
border: none !important;
border-radius: 8px !important;
background: white !important;
}
/* Dynamic URL Input Rows (Batch Tab) */
.url-input-row {
background: rgba(255,255,255,0.03) !important;
border: 1px solid rgba(255,255,255,0.1) !important;
border-radius: 12px !important;
padding: 8px 12px !important;
margin-bottom: 8px !important;
transition: all 0.2s ease !important;
}
.url-input-row:hover {
background: rgba(255,255,255,0.06) !important;
border-color: var(--aether-accent) !important;
}
.url-single-input input, .url-single-input textarea {
background: transparent !important;
border: none !important;
color: white !important;
font-size: 14px !important;
}
.url-single-input input::placeholder { color: #666 !important; }
.url-delete-btn {
background: rgba(239, 68, 68, 0.2) !important;
border: 1px solid rgba(239, 68, 68, 0.3) !important;
border-radius: 8px !important;
color: #ef4444 !important;
font-size: 16px !important;
min-width: 40px !important;
max-width: 40px !important;
height: 40px !important;
padding: 0 !important;
transition: all 0.2s ease !important;
}
.url-delete-btn:hover {
background: rgba(239, 68, 68, 0.4) !important;
transform: scale(1.1);
}
.action-pill-secondary {
background: rgba(99, 102, 241, 0.2) !important;
border: 1px solid rgba(99, 102, 241, 0.3) !important;
border-radius: 50px !important;
color: white !important;
font-weight: 600 !important;
padding: 12px 24px !important;
transition: all 0.2s ease !important;
}
.action-pill-secondary:hover {
background: rgba(99, 102, 241, 0.4) !important;
transform: translateY(-2px);
}
/* Simple Row Alignment - Vertically center items in hero rows */
.hero-section .row,
.hero-section .gr-row {
align-items: center !important;
}
/* Premium Tab - Constrained width for cleaner look */
.premium-tab {
max-width: 900px !important;
margin-left: auto !important;
margin-right: auto !important;
}
/* Export Row - Force equal width columns */
.export-row > div {
flex: 1 1 33.33% !important;
max-width: 33.33% !important;
min-width: 0 !important;
}
"""
# ============================================================================
# UI CONTEXT MOCK
# ============================================================================
# Singleton scraper for UI
_ui_scraper = None
_ui_config = None
async def get_ui_context():
"""Get or create UI context with scraper."""
global _ui_scraper, _ui_config
if _ui_scraper is None:
_ui_config = MCPConfig.from_env()
# Ensure outputs dir exists
os.makedirs("outputs", exist_ok=True)
os.makedirs("outputs/batch", exist_ok=True)
_ui_scraper = ScraperService(max_workers=2)
await _ui_scraper.ensure_initialized()
return _ui_scraper, _ui_config
# ============================================================================
# VOICE DATA
# ============================================================================
def get_voice_choices():
"""Get voice choices for dropdown."""
choices = []
choices.append(("🎙️ George (British, warm) - DEFAULT", "george"))
choices.append(("🎙️ Adam (American, deep)", "adam"))
choices.append(("🎙️ Pearson (Newsreader)", "sarah"))
for category, voices in VOICE_CATEGORIES.items():
for voice in voices:
if voice not in ["george", "adam", "sarah"]:
choices.append((f"{category.title()}: {voice.title()}", voice))
return choices
VOICE_CHOICES = get_voice_choices()
# ============================================================================
# TAB 1: DISCOVER
# ============================================================================
async def search_articles(query: str, max_articles: int = 5):
"""Search Medium articles."""
if not query:
return "Please enter a search query.
"
try:
scraper, config = await get_ui_context()
gr.Info(f"Searching for '{query}'...")
results = await scraper.scrape_search(query, max_articles=max_articles)
return render_cards(results, query=query)
except Exception as e:
return f"Error: {str(e)}
"
async def browse_tag(tag: str, max_articles: int = 5):
"""Browse Medium tag."""
if not tag:
return "Please enter a tag.
"
try:
scraper, config = await get_ui_context()
gr.Info(f"Browsing '{tag}'...")
results = await scraper.scrape_tag(tag, max_articles=max_articles)
return render_cards(results, query=tag)
except Exception as e:
return f"Error: {str(e)}
"
def render_empty_state(query: str, search_type: str, error: str = None):
"""Render a helpful empty state with suggestions."""
suggestions = [
"artificial-intelligence",
"python",
"machine-learning",
"programming",
"technology",
"startup",
"design"
]
suggestion_chips = " ".join([
f"{s}"
for s in suggestions[:5]
])
error_detail = ""
if error:
error_detail = f"Debug: {error[:100]}
"
return f"""
🔍
No articles found for "{query}"
{"Medium might be rate-limiting requests. Try again in a moment." if search_type == "search" else "This tag may not exist or have no recent articles."}
Try these popular topics:
{suggestion_chips}
💡 Tip: Use specific terms (e.g., "python async" instead of just "python")
{error_detail}
"""
def render_cards(results, query: str = ""):
if not results:
return render_empty_state(query or "your search", "search")
# Filter out any invalid results
valid_results = [r for r in results if r and r.get('title') and r.get('url')]
if not valid_results:
return render_empty_state(query or "your search", "search")
html = ""
for art in valid_results:
title = art.get('title', 'No Title')
url = art.get('url', '#')
author = art.get('author', 'Unknown')
if isinstance(author, dict): author = author.get('name', 'Unknown')
# Ensure high-resolution image (upgrade any low-res URLs)
raw_img = art.get('imageUrl', '') or 'https://miro.medium.com/v2/resize:fit:1400/1*jfdwtvU6V6g99q3G7gq7dQ.png'
img = upgrade_medium_image_url(raw_img, target_width=1400)
html += f"""
"""
html += "
"
return html
# ============================================================================
# TAB 2: SCRAPE (UNIFIED)
# ============================================================================
async def scrape_and_render_combined(url: str):
"""Scrape article and return HTML preview immediately."""
if not url: return "Please enter a URL."
try:
scraper, config = await get_ui_context()
gr.Info("Scraping Article...")
article = await scraper.scrape_article(url)
if not article: return "Failed to scrape."
# Render HTML using modified renderer (now full width + better styling)
html = render_full_page(article)
# Inject full width style just in case + iframe wrap
return f""
except Exception as e:
return f"Error: {e}"
async def export_article(url: str, format: str):
if not url:
gr.Warning("Please enter a URL first")
return None
try:
scraper, config = await get_ui_context()
article = await scraper.scrape_article(url)
title = article.get('title', 'export')
safe_title = "".join(c if c.isalnum() else "_" for c in title)[:30]
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
if format == "pdf":
# Primary: WeasyPrint
if WEASYPRINT_AVAILABLE:
try:
html = render_full_page(article)
filepath = f"outputs/{safe_title}_{timestamp}.pdf"
WP_HTML(string=html).write_pdf(filepath)
except Exception as e:
gr.Info("⚠️ WeasyPrint failed. Switching to Robust Engine (Playwright)...")
# Fallback: Playwright
html = render_full_page(article)
pdf_bytes = await scraper.render_pdf(html)
filepath = f"outputs/{safe_title}_{timestamp}_pw.pdf"
with open(filepath, "wb") as f: f.write(pdf_bytes)
else:
# Fallback: Playwright
gr.Info("Generating PDF with Robust Engine (Playwright)...")
html = render_full_page(article)
pdf_bytes = await scraper.render_pdf(html)
filepath = f"outputs/{safe_title}_{timestamp}.pdf"
with open(filepath, "wb") as f: f.write(pdf_bytes)
elif format == "html":
html = render_full_page(article)
filepath = f"outputs/{safe_title}_{timestamp}.html"
with open(filepath, "w", encoding="utf-8") as f: f.write(html)
elif format == "json":
filepath = f"outputs/{safe_title}_{timestamp}.json"
with open(filepath, "w", encoding="utf-8") as f:
json.dump(article, f, indent=2, default=str)
else: # markdown
filepath = f"outputs/{safe_title}_{timestamp}.md"
with open(filepath, "w", encoding="utf-8") as f:
f.write(article.get('markdownContent', ''))
gr.Info(f"✅ Exported: {filepath}")
return filepath
except Exception as e:
gr.Warning(f"Export failed: {str(e)}")
return None
# ============================================================================
# TAB 3: BATCH
# ============================================================================
async def batch_scrape(urls_text: str, max_concurrency: int = 5):
if not urls_text.strip(): return "Please enter URLs.", None
urls = [u.strip() for u in urls_text.strip().split('\n') if u.strip()]
if len(urls) > 20: return "Max 20 URLs allowed.", None
try:
scraper, config = await get_ui_context()
# Create batch directory
batch_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
batch_dir = Path(f"outputs/batch/{batch_id}")
batch_dir.mkdir(parents=True, exist_ok=True)
gr.Info(f"Batch processing {len(urls)} articles...")
results = []
semaphore = asyncio.Semaphore(max_concurrency)
async def process(url):
async with semaphore:
try:
art = await scraper.scrape_article(url)
title = art.get('title', 'unknown')
safe_title = "".join(c if c.isalnum() else "_" for c in title)[:30]
# Use improved renderer
html = render_full_page(art)
file_name = "-"
# Try PDF if available, else HTML
try:
if WEASYPRINT_AVAILABLE:
try:
file_path = batch_dir / f"{safe_title}.pdf"
WP_HTML(string=html).write_pdf(str(file_path))
file_name = file_path.name
except Exception:
pdf_bytes = await scraper.render_pdf(html)
file_path = batch_dir / f"{safe_title}.pdf"
with open(file_path, "wb") as f: f.write(pdf_bytes)
file_name = file_path.name
else:
pdf_bytes = await scraper.render_pdf(html)
file_path = batch_dir / f"{safe_title}.pdf"
with open(file_path, "wb") as f: f.write(pdf_bytes)
file_name = file_path.name
except Exception as e:
print(f"PDF generation failed for {url}: {e}")
# Final fallback -> HTML
file_path = batch_dir / f"{safe_title}.html"
with open(file_path, "w", encoding="utf-8") as f: f.write(html)
file_name = file_path.name
return {"status": "✅", "title": title, "file": file_name}
except Exception as e:
return {"status": "❌", "title": f"Failed: {url}", "file": "-"}
tasks = [process(u) for u in urls]
results = await asyncio.gather(*tasks)
# Create ZIP
zip_path = shutil.make_archive(f"outputs/batch_{batch_id}", 'zip', batch_dir)
# Generate Premium Summary HTML
success_count = sum(1 for r in results if r["status"] == "✅")
fail_count = len(results) - success_count
html_out = f'''
📦 Batch Results
✅ {success_count} Success
{f'❌ {fail_count} Failed' if fail_count > 0 else ''}
'''
for r in results:
is_success = r["status"] == "✅"
bg_color = "rgba(34,197,94,0.08)" if is_success else "rgba(239,68,68,0.08)"
border_color = "rgba(34,197,94,0.3)" if is_success else "rgba(239,68,68,0.3)"
status_color = "#22c55e" if is_success else "#ef4444"
html_out += f'''
{r["status"]}
{r["title"][:50]}{"..." if len(r["title"]) > 50 else ""}
{r["file"]}
'''
html_out += '''
'''
return html_out, zip_path
except Exception as e:
return f"Error: {e}", None
# ============================================================================
# TAB 4: SONIC
# ============================================================================
def clean_text_for_audio(text: str) -> str:
"""Clean markdown text for audio narration - remove all non-speakable content."""
import re
# Remove code blocks (```...```)
text = re.sub(r'```[\s\S]*?```', ' ', text)
# Remove inline code (`...`)
text = re.sub(r'`[^`]+`', ' ', text)
# Remove URLs
text = re.sub(r'https?://\S+', ' ', text)
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
# Remove markdown headers (# ## ###)
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
# Remove markdown formatting
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # **bold** -> bold
text = re.sub(r'\*([^*]+)\*', r'\1', text) # *italic* -> italic
text = re.sub(r'__([^_]+)__', r'\1', text)
text = re.sub(r'_([^_]+)_', r'\1', text)
# Remove image references
text = re.sub(r'!\[.*?\]\(.*?\)', ' ', text)
# Remove HTML tags
text = re.sub(r'<[^>]+>', ' ', text)
# Remove special characters that don't belong in speech
text = re.sub(r'[•◦▪▸►→←↑↓|~^]', ' ', text)
# Normalize whitespace
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
async def generate_audio(url, voice, summarize, max_chars):
if not url: return '⚠️ Please enter a URL
', None
try:
scraper, config = await get_ui_context()
# Robust check for API Key
api_key = os.environ.get("ELEVENLABS_API_KEY")
if not api_key:
return '⚠️ ELEVENLABS_API_KEY missing
', None
gr.Info("Scraping article...")
art = await scraper.scrape_article(url)
raw_text = art.get('markdownContent', '')
title = art.get('title', 'Audio')
# Clean text for audio FIRST - remove markdown, URLs, code, etc.
text = clean_text_for_audio(raw_text)
# Fallback if scraping returns minimal content
if not text or len(text) < 50:
return '⚠️ Article too short
', None
# Summarize with Groq (PRIMARY) or Gemini (BACKUP)
if summarize != "none":
groq_key = os.environ.get("GROQ_API_KEY")
gemini_key = os.environ.get("GEMINI_API_KEY")
summarize_success = False
# HEAVILY GUARDRAILED PROMPT - 250 char limit for ElevenLabs
prompt = f"""Summarize this article in EXACTLY 2-3 sentences (under 250 characters total).
RULES:
- Plain English only, no markdown/formatting
- Must be under 250 characters
- End with a complete sentence
- Focus on the core message
Title: {title}
Content: {text[:2000]}
Write a 250-character summary:"""
# Try Groq first (PRIMARY - fastest)
if groq_key and not summarize_success:
try:
gr.Info("Summarizing for audio with Groq...")
client = Groq(api_key=groq_key)
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": prompt}],
max_tokens=100, # ~250 chars output
temperature=0.5 # More focused
)
summary = response.choices[0].message.content.strip()
# Validate response
if summary and len(summary) > 50:
summary = clean_text_for_audio(summary)
if len(summary) > 50 and not any(bad in summary.lower() for bad in ['```', 'http://', 'https://', '**', '##']):
text = summary
summarize_success = True
except Exception as e:
gr.Warning(f"Groq failed: {str(e)[:50]}, trying Gemini...")
# Fallback to Gemini (BACKUP)
if gemini_key and not summarize_success:
try:
gr.Info("Summarizing for audio with Gemini...")
genai.configure(api_key=gemini_key)
try:
model = genai.GenerativeModel('gemini-2.0-flash')
except:
model = genai.GenerativeModel('gemini-1.5-flash-latest')
response = await model.generate_content_async(prompt)
summary = response.text.strip()
if summary and len(summary) > 50:
summary = clean_text_for_audio(summary)
if len(summary) > 50 and not any(bad in summary.lower() for bad in ['```', 'http://', 'https://', '**', '##']):
text = summary
summarize_success = True
else:
gr.Warning("Summary had issues, using cleaned original")
except Exception as e:
gr.Warning(f"Gemini also failed: {str(e)[:50]}")
# Final fallback: truncate original
if not summarize_success:
text = text[:max_chars]
# Final safety check - ensure text is clean for TTS
text = clean_text_for_audio(text)
# Ensure text ends at a complete sentence (not abrupt)
def ensure_clean_ending(txt):
"""Ensure text ends at a complete sentence."""
if not txt:
return txt
# Find the last sentence-ending punctuation
last_period = txt.rfind('.')
last_question = txt.rfind('?')
last_exclaim = txt.rfind('!')
last_end = max(last_period, last_question, last_exclaim)
if last_end > len(txt) * 0.5: # Only trim if we keep at least half
return txt[:last_end + 1].strip()
return txt
text = ensure_clean_ending(text)
if len(text) < 20:
return '⚠️ Not enough clean text for audio
', None
# HARD LIMIT: 250 characters for ElevenLabs
if len(text) > 250:
text = text[:250].rsplit(' ', 1)[0] + '.' # Cut at word boundary
gr.Info(f"Generating audio ({len(text)} chars)...")
try:
from elevenlabs.client import ElevenLabs
client = ElevenLabs(api_key=api_key, timeout=60.0)
voice_id = get_voice_id(voice)
audio = client.text_to_speech.convert(
text=text, voice_id=voice_id, model_id="eleven_multilingual_v2"
)
out_path = f"outputs/audio_{uuid.uuid4().hex[:6]}.mp3"
with open(out_path, "wb") as f:
for chunk in audio: f.write(chunk)
return f'✅ Ready: {title}
', out_path
except Exception as e:
return f'⚠️ ElevenLabs Error: {str(e)}
', None
except Exception as e:
return f'❌ Error: {e}
', None
# ============================================================================
# TAB 5: INTELLIGENCE (ENHANCED)
# ============================================================================
# Global to store last report for export
last_report_html = None
async def analyst_report(topic):
"""Generate analyst report by searching and synthesizing Medium articles."""
global last_report_html
if not topic:
return "Please enter a topic."
groq_key = os.environ.get("GROQ_API_KEY")
gemini_key = os.environ.get("GEMINI_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")
if not groq_key and not gemini_key and not openai_key:
return "⚠️ Error: No AI API keys found. Set GROQ_API_KEY, GEMINI_API_KEY, or OPENAI_API_KEY in your .env file."
max_articles = 5
try:
scraper, config = await get_ui_context()
gr.Info(f"Analyst: Researching '{topic}'...")
# 1. Search for articles
articles = await scraper.scrape_search(topic, max_articles=max_articles)
if not articles:
return f"""
No Results Found
No articles found for '{topic}'. Try a broader or different search term.
"""
gr.Info(f"Found {len(articles)} articles. Extracting content...")
# 2. Extract content from articles
async def get_article_content(art):
url = art.get('url')
title = art.get('title', 'Untitled')
author = art.get('author', {}).get('name') if isinstance(art.get('author'), dict) else art.get('author', 'Unknown')
try:
full_art = await scraper.scrape_article(url)
content = full_art.get("markdownContent", "")[:2000]
except:
content = "(Content unavailable)"
return f"\nTitle: {title}\nAuthor: {author}\nURL: {url}\nContent:\n{content}\n"
results = await asyncio.gather(*[get_article_content(art) for art in articles])
context_text = "".join(results)
# 3. Create synthesis prompt
prompt = f"""You are a tech analyst. Synthesize the following Medium articles into a 'State of the Union' report.
Topic: {topic}
Structure your report:
1. Executive Summary (2-3 sentences)
2. Key Trends
3. Notable Insights
4. Contrarian Views (if any)
5. Recommended Reading
Articles:
{context_text}
"""
gr.Info("Analyst: Synthesizing report...")
report_content = ""
# 4. Try Groq first (PRIMARY - fastest)
if groq_key:
try:
client = Groq(api_key=groq_key)
response = client.chat.completions.create(
model="llama-3.3-70b-versatile", # Best model for synthesis
messages=[{"role": "user", "content": prompt}],
max_tokens=2000,
temperature=0.7
)
report_content = response.choices[0].message.content
gr.Info("Report generated via Groq")
except Exception as e:
gr.Warning(f"Groq failed: {str(e)[:100]}, trying Gemini...")
# 5. Fallback to Gemini
if not report_content and gemini_key:
try:
genai.configure(api_key=gemini_key)
try:
model = genai.GenerativeModel('gemini-2.0-flash')
except:
model = genai.GenerativeModel('gemini-1.5-flash-latest')
response = await model.generate_content_async(prompt)
report_content = response.text
gr.Info("Report generated via Gemini")
except Exception as e:
gr.Warning(f"Gemini failed: {str(e)[:100]}, trying OpenAI...")
# 6. Fallback to OpenAI
if not report_content and openai_key:
try:
from openai import AsyncOpenAI
client = AsyncOpenAI(api_key=openai_key)
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
report_content = response.choices[0].message.content
gr.Info("Report generated via GPT-4")
except Exception as e:
gr.Warning(f"OpenAI also failed: {str(e)[:100]}")
# 6. Fallback: Rich article list (when AI quota exceeded)
if not report_content:
report_content = f"📊 Research Digest: {topic}
"
report_content += "Note: AI synthesis services are currently unavailable (quota exceeded). Below is a curated list of the top articles found.
"
report_content += f"🔍 {len(articles)} Key Articles
"
for idx, art in enumerate(articles, 1):
title = art.get('title', 'Unknown Title')
author = art.get('author', {}).get('name', 'Unknown') if isinstance(art.get('author'), dict) else 'Unknown'
url = art.get('url', '#')
reading_time = art.get('readingTime', 'N/A')
claps = art.get('claps', 0)
report_content += f"""
✍️ {author}
{'| ⏱️ ' + str(reading_time) + ' min read' if reading_time != 'N/A' else ''}
{' | 👏 ' + str(claps) + ' claps' if claps > 0 else ''}
"""
report_content += """
💡 To Get AI Analysis
To enable AI-powered synthesis of these articles:
- Gemini (Free): Get a free API key at Google AI Studio
- OpenAI (Paid): Add billing at OpenAI Platform
- Update your
.env file with the new key
- Restart the UI
"""
# Convert markdown to HTML if needed
if not report_content.startswith("<"):
import markdown
report_content = markdown.markdown(report_content)
# Wrap in digital paper
final_html = f"{report_content}
"
last_report_html = final_html
return final_html
except Exception as e:
error_msg = str(e)
return f"""
⚠️ Analysis Error
The analyst encountered an issue while researching '{topic}'.
Error: {error_msg}
Troubleshooting:
- Check that your GEMINI_API_KEY or OPENAI_API_KEY is set correctly
- Verify your API keys have sufficient quota
- Try a different search term
"""
async def export_report_pdf():
global last_report_html
if not last_report_html:
gr.Warning("No report generated yet")
return None
try:
scraper, config = await get_ui_context()
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = f"outputs/analyst_report_{timestamp}.pdf"
# Apply NEW RENDERER TEMPLATE for polished PDF
# We reuse RENDERER_TEMPLATE from html_renderer.py which has the "clean" CSS
# We inject the content into the template
styled_html = RENDERER_TEMPLATE.format(title="Analyst Report", content=last_report_html)
if WEASYPRINT_AVAILABLE:
try:
WP_HTML(string=styled_html).write_pdf(filepath)
except Exception:
gr.Info("Using Robust PDF Engine...")
pdf_bytes = await scraper.render_pdf(styled_html)
with open(filepath, "wb") as f: f.write(pdf_bytes)
else:
gr.Info("Using Robust PDF Engine...")
pdf_bytes = await scraper.render_pdf(styled_html)
with open(filepath, "wb") as f: f.write(pdf_bytes)
gr.Info(f"✅ PDF Exported: {filepath}")
return filepath
except Exception as e:
gr.Warning(f"Export failed: {str(e)}")
return None
# ============================================================================
# TAB 6: SETTINGS
# ============================================================================
def render_settings():
keys = {
"Groq": "GROQ_API_KEY", # PRIMARY LLM
"ElevenLabs": "ELEVENLABS_API_KEY",
"Gemini": "GEMINI_API_KEY", # BACKUP LLM
"OpenAI": "OPENAI_API_KEY" # BACKUP LLM
}
html = "System Status
"
for name, key in keys.items():
is_set = bool(os.environ.get(key))
cls = "active" if is_set else "missing"
icon = "✅" if is_set else "❌"
text = "Active & Ready" if is_set else "Missing Configuration"
html += f"""
"""
return html
def refresh_settings():
return render_settings()
# ============================================================================
# BUILD UI
# ============================================================================
with gr.Blocks(title="Project Aether") as demo:
gr.HTML(f"")
with gr.Tabs():
# 0. HERO - Landing page with feature overview
with gr.TabItem("✦ Home"):
gr.HTML('''
✦
Project Aether
The Ultimate Medium Intelligence Platform
Extract, analyze, and transform Medium articles into actionable insights with AI-powered tools.
Search and browse Medium articles by topic or tag. Find the best content instantly.
Extract and view any Medium article. Export to PDF, Markdown, HTML, or JSON.
Archive up to 20 articles at once. Download as a ZIP with PDFs ready to read offline.
Convert articles to podcast-style audio. 43+ premium voices powered by ElevenLabs.
Generate comprehensive intelligence reports. AI-powered synthesis using Groq & GPT-4.
Manage API keys and system status. Connect ElevenLabs, Groq, and OpenAI.
''')
# 1. DISCOVER - Full width like Sonic
with gr.TabItem("🔍 Discover"):
with gr.Column(elem_classes="hero-section"):
# Centered Hero Card
gr.HTML('''
Explore Medium
Search topics or browse by tag to find articles
''')
# Search Input
d_search = gr.Textbox(placeholder="Search topics...", show_label=False, elem_classes="omnibar")
with gr.Row():
d_btn = gr.Button("🔍 Search", variant="primary", elem_classes="action-pill", scale=1)
# Tag Browse Section
gr.HTML('📂 Browse by Tag
')
d_tag_input = gr.Textbox(placeholder="Enter a tag (e.g., python, ai, startup)...", show_label=False, elem_classes="omnibar")
with gr.Row():
d_tag_btn = gr.Button("📂 Browse Tag", variant="secondary", elem_classes="action-pill", scale=1)
# Settings
with gr.Row():
d_slider = gr.Slider(1, 10, value=5, step=1, label="Max Articles")
# Results
d_out = gr.HTML(
value='🔍 Enter a search term or tag above
',
elem_classes="discover-results"
)
d_btn.click(search_articles, inputs=[d_search, d_slider], outputs=d_out)
d_tag_btn.click(browse_tag, inputs=[d_tag_input, d_slider], outputs=d_out)
# 2. SCRAPE - Premium layout with centered hero card
with gr.TabItem("📄 Scrape"):
with gr.Column(elem_classes="hero-section"):
# Centered Hero Card
gr.HTML('''
Article Reader
Extract and view any Medium article
''')
# URL Input
s_url = gr.Textbox(placeholder="Paste Medium Article URL...", show_label=False, elem_classes="omnibar")
with gr.Row():
s_btn = gr.Button("📄 Scrape Article", variant="primary", elem_classes="action-pill", scale=1)
# Article View
gr.HTML('📖 Article View
')
s_out = gr.HTML(
value='📄 Paste a URL above to view article
',
)
# Export Section
gr.HTML('📦 Export Archive
')
with gr.Row():
s_fmt = gr.Dropdown(["pdf", "markdown", "html", "json"], value="pdf", label="Format", scale=1)
s_exp_btn = gr.Button("📥 Download", variant="primary", elem_classes="action-pill", scale=1)
s_file = gr.File(label="Downloaded File", show_label=True, interactive=False)
s_btn.click(scrape_and_render_combined, inputs=s_url, outputs=s_out)
s_exp_btn.click(export_article, inputs=[s_url, s_fmt], outputs=s_file)
# 3. BATCH - Premium layout with centered hero card
with gr.TabItem("📦 Batch"):
with gr.Column(elem_classes="hero-section"):
# Centered Hero Card
gr.HTML('''
Bulk Archiver
Archive up to 20 articles at once as PDFs
''')
# State to track URL list
url_list = gr.State([""]) # Start with one empty slot
# Dynamic URL inputs container
@gr.render(inputs=url_list)
def render_url_inputs(urls):
for i, url in enumerate(urls):
with gr.Row(elem_classes="url-input-row"):
url_box = gr.Textbox(
value=url,
placeholder=f"URL #{i+1} - Paste Medium article link...",
show_label=False,
elem_classes="url-single-input",
scale=6,
container=False
)
del_btn = gr.Button(
"❌",
scale=1,
min_width=50,
elem_classes="url-delete-btn",
variant="secondary"
)
def update_url(new_url, urls_list, idx=i):
urls_list[idx] = new_url
return urls_list
url_box.change(update_url, inputs=[url_box, url_list], outputs=url_list)
def delete_url(urls_list, idx=i):
if len(urls_list) > 1:
return urls_list[:idx] + urls_list[idx+1:]
return urls_list
del_btn.click(delete_url, inputs=url_list, outputs=url_list)
# Action Buttons
with gr.Row():
add_url_btn = gr.Button("➕ Add URL", variant="secondary", elem_classes="action-pill-secondary")
b_btn = gr.Button("🚀 Process Archive", variant="primary", elem_classes="action-pill")
def add_url(urls_list):
if len(urls_list) < 20:
return urls_list + [""]
return urls_list
add_url_btn.click(add_url, inputs=url_list, outputs=url_list)
# Results
b_html = gr.HTML(
value='📦 Add URLs above and click Process
',
)
b_zip = gr.File(label="Download Archive (ZIP)")
async def process_batch_from_list(urls_list):
urls_text = "\n".join([u for u in urls_list if u.strip()])
return await batch_scrape(urls_text)
b_btn.click(process_batch_from_list, inputs=url_list, outputs=[b_html, b_zip])
# 4. SONIC (AUDIO) - POLISHED UI
with gr.TabItem("🎧 Sonic"):
with gr.Column(elem_classes="hero-section"):
# Compact Centered Hero Card
gr.HTML('''
Article to Audio
Convert any Medium article into podcast-style narration
''')
# URL Input
so_url = gr.Textbox(placeholder="Paste Medium Article URL...", show_label=False, elem_classes="omnibar")
# Voice Selection (standalone like Discover's tag input)
so_voice = gr.Dropdown(VOICE_CHOICES, value="george", label="🎙️ Voice")
# Generate Button (in own row like Discover's search button)
with gr.Row():
so_btn = gr.Button("🔊 Generate Audio", variant="primary", elem_classes="action-pill", scale=1)
# Results Section
so_status = gr.HTML(
value='🎵 Ready to generate
',
)
so_player = gr.Audio(label=None, show_label=False, type="filepath")
so_btn.click(generate_audio, inputs=[so_url, so_voice, gr.State("auto"), gr.State(250)], outputs=[so_status, so_player])
# 5. INTELLIGENCE - Premium layout matching Sonic tab style
with gr.TabItem("🧠 Intelligence"):
with gr.Column(elem_classes="hero-section"):
# Centered Hero Card (like Sonic)
gr.HTML('''
Analyst Dashboard
Generate comprehensive intelligence reports using AI-powered analysis
''')
# Topic Input (like Sonic's URL input)
i_topic = gr.Textbox(placeholder="Enter a topic for deep analysis...", show_label=False, elem_classes="omnibar")
# Generate Row (balanced like Sonic)
with gr.Row():
i_btn = gr.Button("🔍 Generate Report", variant="primary", elem_classes="action-pill", scale=1)
# Output Area
i_out = gr.HTML(
value='📊 Ready to analyze
',
)
# Export Section - matches Scrape tab layout
gr.HTML('📦 Export Report
')
with gr.Row():
i_exp_btn = gr.Button("📥 Download PDF", variant="primary", elem_classes="action-pill", scale=1)
i_file = gr.File(label="Downloaded File", show_label=True, interactive=False)
i_btn.click(analyst_report, inputs=i_topic, outputs=i_out)
i_exp_btn.click(export_report_pdf, outputs=i_file)
# 6. SETTINGS - Premium layout matching Sonic tab style
with gr.TabItem("⚙️ Settings"):
with gr.Column(elem_classes="hero-section"):
# Centered Hero Card (like Sonic)
gr.HTML('''
Configuration Status
API keys and system configuration
''')
set_html = gr.HTML(render_settings())
set_refresh = gr.Button("Refresh Status", elem_classes="action-pill")
set_refresh.click(refresh_settings, outputs=set_html)
if __name__ == "__main__":
print("--- DEPOLOYMENT_VERSION_3_FORCE_BLACK ---")
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)