""" Medium MCP Server UI - Project Aether Edition v5.0 A premium Gradio interface for the Medium MCP Server with: - 6 functional tabs for all 10 MCP tools - Project Aether glassmorphism design (Prose Typography & Full Width) - ElevenLabs voice selection with 43 voices - PDF Export (Robust: WeasyPrint -> Playwright Fallback) - Dynamic Settings & Tag Search - Real Analyst Intelligence (Gemini 1.5 Flash) with fallback Search - Unified Preview/Output View via Iframe """ import gradio as gr import asyncio import os import sys import json import markdown import shutil import datetime import uuid import time from pathlib import Path from dotenv import load_dotenv # Check dependencies WEASYPRINT_AVAILABLE = False try: # Suppress stderr during weasyprint import to avoid scary DLL warnings import logging logging.getLogger("weasyprint").setLevel(logging.ERROR) from weasyprint import HTML as WP_HTML, CSS as WP_CSS # Try initialize to catch DLL errors early (silent check) try: WP_HTML(string="

test

").write_pdf() WEASYPRINT_AVAILABLE = True except Exception: WEASYPRINT_AVAILABLE = False except Exception: WEASYPRINT_AVAILABLE = False # Fix for Windows Asyncio Loop (prevents some 10054 errors) if sys.platform == 'win32': asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # Load environment variables load_dotenv(os.path.join(os.path.dirname(__file__), ".env")) # No sys.path needed - src/ is in same project now # Import from server and scraper from src.service import ScraperService # Import renderer for explicit usage from src.html_renderer import render_full_page, BASE_TEMPLATE as RENDERER_TEMPLATE from src.utils import upgrade_medium_image_url from src.config import MCPConfig from elevenlabs_voices import ELEVENLABS_VOICES, VOICE_CATEGORIES, get_voice_id # Import Gemini for Analyst (backup) - Using new google.genai SDK from google import genai from google.genai import types # Import Groq for primary LLM from groq import Groq # ============================================================================ # PROJECT AETHER: VISUAL SYSTEM (ENHANCED) # ============================================================================ AETHER_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;700&family=Inter:wght@300;400;600&family=JetBrains+Mono:wght@400;600&display=swap'); :root { --aether-bg: #121212; --aether-surface: #1e1e1e; --aether-accent: #6366f1; --aether-text: #ffffff; --aether-text-muted: #a1a1aa; --glass-bg: rgba(30, 30, 30, 0.6); --glass-border: rgba(255, 255, 255, 0.1); --status-green: #22c55e; --status-red: #ef4444; } body, .gradio-container { background-color: var(--aether-bg) !important; font-family: 'Inter', sans-serif !important; color: var(--aether-text) !important; } h1, h2, h3, .prose h1, .prose h2, .prose h3 { font-family: 'Playfair Display', serif !important; letter-spacing: -0.02em; } footer { display: none !important; } .gradio-container { max-width: 100% !important; margin: 0; padding: 0 20px; } /* Center Tab Navigation - Aggressive approach */ .tabs { display: flex !important; flex-direction: column !important; align-items: center !important; } .tabs > .tab-nav { justify-content: center !important; } .tabs > .tabitem { width: 100% !important; } /* Tab Content Container - Consistent Width */ .tabitem { width: 100% !important; } .tabitem > div { width: 100% !important; } /* Discover Results Container - Prevent Layout Shift */ .discover-results, .scrape-results { min-height: 400px; width: 100% !important; } /* Skeleton Article Placeholder */ .skeleton-article { background: var(--aether-surface); border: 1px solid var(--glass-border); border-radius: 12px; padding: 40px; min-height: 500px; } .skeleton-header { height: 32px; background: linear-gradient(90deg, #2a2a2a 25%, #3a3a3a 50%, #2a2a2a 75%); background-size: 200% 100%; animation: shimmer 1.5s infinite; border-radius: 8px; margin-bottom: 16px; width: 70%; } .skeleton-line { height: 16px; background: linear-gradient(90deg, #2a2a2a 25%, #3a3a3a 50%, #2a2a2a 75%); background-size: 200% 100%; animation: shimmer 1.5s infinite; border-radius: 4px; margin-bottom: 12px; } .skeleton-line:nth-child(odd) { width: 100%; } .skeleton-line:nth-child(even) { width: 85%; } @keyframes shimmer { 0% { background-position: 200% 0; } 100% { background-position: -200% 0; } } /* Export Section - Premium Styling */ .export-control-bar { /* Override Gradio Variables LOCALLY */ --input-background-fill: #0b0b0b !important; --background-fill-primary: #0b0b0b !important; --background-fill-secondary: #0b0b0b !important; --block-background-fill: #0b0b0b !important; background: rgba(30,30,30,0.85); backdrop-filter: blur(20px); border: 1px solid rgba(255,255,255,0.1); border-radius: 100px; padding: 0 16px; margin-top: 24px; display: flex !important; flex-wrap: nowrap !important; flex-direction: row !important; align-items: center !important; gap: 12px; height: 80px !important; max-height: 80px !important; min-height: 80px !important; box-shadow: 0 10px 40px rgba(0,0,0,0.5); width: 100% !important; overflow: visible !important; /* CRITICAL FIX: Allow Dropdown Overflow */ z-index: 9999 !important; /* Ensure on top */ } /* NUCLEAR CHECKLIST: 1. Containers -> Transparent 2. Input -> Black 3. Junk (Loaders, Close btns, Error chips) -> Hidden */ .export-control-bar > *, .export-control-bar .gr-block, .export-control-bar .gr-box, .export-control-bar .gr-form, .export-control-bar .gr-input, .export-control-bar .wrap, .export-control-bar .contain, .export-control-bar label { background: transparent !important; border: none !important; margin: 0 !important; padding: 0 !important; flex-wrap: nowrap !important; } /* 1. Format Selector */ .export-control-bar .dropdown-wrap { background: transparent !important; border: none !important; } .export-control-bar .gr-dropdown { width: 100px !important; flex-shrink: 0 !important; overflow: visible !important; } .export-control-bar .gr-dropdown .wrap-inner { border-radius: 50px !important; background: rgba(255,255,255,0.08) !important; border: 1px solid rgba(255,255,255,0.1) !important; } .export-control-bar .gr-dropdown input { color: white !important; text-align: center !important; font-weight: 600 !important; height: 48px !important; } .export-control-bar .gr-dropdown ul.options { background: #1a1a1a !important; border: 1px solid #333 !important; z-index: 10000 !important; } /* 2. Download Button */ .export-control-bar .action-pill { border-radius: 50px !important; height: 48px !important; background: var(--aether-accent) !important; color: white !important; font-weight: 700 !important; margin-left: auto !important; flex: 0 0 160px !important; width: 160px !important; } /* 3. Text inputs (Status & Filename) */ /* 3. Text inputs (Status & Filename) - FORCE BLACK FIX */ /* 3. Text inputs (Status & Filename) - LIGHT MODE PERFECTION */ .export-control-bar textarea, .export-control-bar textarea:disabled, .export-control-bar textarea:read-only, .export-control-bar textarea::placeholder { background-color: #ffffff !important; /* White background */ background: #ffffff !important; color: #000000 !important; /* Black text */ -webkit-text-fill-color: #000000 !important; /* CRITICAL for disabled inputs */ opacity: 1 !important; border: 1px solid rgba(255,255,255,0.1) !important; border-radius: 8px !important; font-size: 13px !important; text-align: right !important; line-height: 80px !important; height: 80px !important; padding: 0 12px !important; box-shadow: none !important; overflow: hidden !important; white-space: nowrap !important; font-weight: 700 !important; /* Bolder text */ cursor: default !important; pointer-events: none !important; } /* Force Wrapper Transparency */ .export-control-bar .block, .export-control-bar .wrap, .export-control-bar .gradio-container, .export-control-bar label, .export-control-bar .input-container { background-color: transparent !important; background: transparent !important; border: none !important; } /* 4. REMOVE JUNK (Cross signs, Error chips, Loaders) */ .export-control-bar .loader, .export-control-bar .loading, .export-control-bar .meta-text, .export-control-bar .progress-text, .export-control-bar .clear-button, /* The 'Cross Sign' */ .export-control-bar .remove-button, .export-control-bar .icon-button, /* Generic icon button (includes clear) */ .export-control-bar .toast-wrap, /* Error Chips */ .export-control-bar button[aria-label="Clear"] { display: none !important; width: 0 !important; height: 0 !important; opacity: 0 !important; visibility: hidden !important; pointer-events: none !important; } /* Flex area for middle content */ .export-control-bar .file-display-area, .export-control-bar .status-display-area { flex: 1 1 auto !important; min-width: 0 !important; display: flex; justify-content: flex-end; align-items: center; } /* Dropdown Arrow Fix */ .export-control-bar .icon { fill: white !important; } /* File Helper - Make it blend in perfectly */ .export-control-bar .file-preview { background: transparent !important; border: none !important; margin: 0 !important; padding: 0 !important; height: 100% !important; display: flex; align-items: center; justify-content: flex-end; color: #e5e5e5; } .export-control-bar .file-name { font-size: 13px; font-family: monospace; } .export-control-bar .file-size { display: none !important; } /* Hide file size to save space */ /* Prose Styling for Markdown (Analyst Reports) */ .prose { font-size: 1.1rem; line-height: 1.75; color: #e5e5e5; } .prose h1, .prose h2, .prose h3 { margin-top: 2em; margin-bottom: 0.5em; line-height: 1.25; font-family: 'Playfair Display', serif; } .prose p { margin-top: 1.25em; margin-bottom: 1.25em; } .prose a { color: var(--aether-accent); text-decoration: none; } .prose a:hover { text-decoration: underline; } .prose code { background-color: #2d2d2d; padding: 0.2em 0.4em; border-radius: 4px; font-family: 'JetBrains Mono', monospace; font-size: 0.9em; } .prose pre { background-color: #1a1a1a; padding: 1.5em; border-radius: 8px; overflow-x: auto; border: 1px solid var(--glass-border); } .prose pre code { background: none; padding: 0; color: #e5e5e5; } .prose blockquote { border-left: 4px solid var(--aether-accent); padding-left: 1em; font-style: italic; color: #a1a1aa; margin: 1.5em 0; } .prose table { width: 100%; border-collapse: collapse; margin: 2em 0; } .prose th, .prose td { padding: 0.75em; border-bottom: 1px solid var(--glass-border); text-align: left; } .prose th { font-weight: 600; color: #fff; } /* Omnibar */ .omnibar { border: none !important; background: transparent !important; box-shadow: none !important; padding: 0 !important; height: 60px !important; } .omnibar label { display: block !important; height: 100% !important; margin: 0 !important; padding: 0 !important; border: none !important; background: transparent !important; } .omnibar label span { display: none !important; } .omnibar textarea, .omnibar input { background: var(--glass-bg) !important; border: 1px solid var(--glass-border) !important; backdrop-filter: blur(12px); border-radius: 16px !important; color: white !important; font-size: 1.1rem !important; padding: 16px 20px !important; transition: all 0.3s ease; box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1); height: 100% !important; } .omnibar textarea:focus, .omnibar input:focus { border-color: var(--aether-accent) !important; box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2), 0 8px 40px rgba(0, 0, 0, 0.2); } /* Action Pills */ .action-pill { background: var(--aether-accent) !important; border: none !important; border-radius: 50px !important; color: white !important; font-weight: 600 !important; text-transform: uppercase; letter-spacing: 0.05em; padding: 0 30px !important; transition: all 0.2s ease; height: 60px !important; box-shadow: 0 4px 15px rgba(99, 102, 241, 0.3); } .action-pill:hover { transform: scale(1.05); box-shadow: 0 0 20px rgba(99, 102, 241, 0.4); } /* Cards */ .aether-card { background: var(--aether-surface); border: 1px solid var(--glass-border); border-radius: 12px; overflow: hidden; transition: all 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275); position: relative; display: block; text-decoration: none; color: inherit; height: 100%; } .aether-card:hover { transform: translateY(-8px) rotateX(2deg); box-shadow: 0 20px 40px rgba(0,0,0,0.4); border-color: var(--aether-accent); z-index: 10; } .aether-card-img { height: 180px; background-size: cover; background-position: center; position: relative; } .aether-card-img::after { content: ''; position: absolute; bottom: 0; left: 0; right: 0; height: 60%; background: linear-gradient(to top, rgba(0,0,0,0.8), transparent); } /* Digital Paper */ .digital-paper { background: #050505; color: #e5e5e5 !important; padding: 60px; border-radius: 8px; border: 1px solid rgba(255, 255, 255, 0.15); box-shadow: 0 20px 60px rgba(0,0,0,0.6); font-family: 'Playfair Display', serif; line-height: 1.9; font-size: 1.1rem; max-width: 100%; /* Full Width */ margin: 0 auto; } .digital-paper h1, .digital-paper h2, .digital-paper h3, .digital-paper p, .digital-paper li { color: #e5e5e5 !important; } .digital-paper h1 { border-bottom: 1px solid rgba(255,255,255,0.2); padding-bottom: 20px; margin-bottom: 30px; font-size: 2.5rem; } .digital-paper blockquote { border-left: 4px solid var(--aether-accent); padding-left: 20px; font-style: italic; color: #a1a1aa !important; } /* Status Cards in Settings */ .status-card { background: rgba(255,255,255,0.03); border: 1px solid var(--glass-border); border-radius: 12px; padding: 20px; display: flex; align-items: center; gap: 15px; margin-bottom: 15px; } .status-card.active { border-color: var(--status-green); background: rgba(34, 197, 94, 0.05); } .status-card.missing { border-color: var(--status-red); background: rgba(239, 68, 68, 0.05); } /* Sonic Player */ .sonic-container { background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(0,0,0,0)); border: 1px solid var(--glass-border); border-radius: 24px; padding: 30px; backdrop-filter: blur(20px); } .album-art { width: 100%; aspect-ratio: 1/1; background: linear-gradient(135deg, #6366f1, #8b5cf6); border-radius: 20px; display: flex; align-items: center; justify-content: center; box-shadow: 0 20px 50px -10px rgba(99, 102, 241, 0.5); margin-bottom: 20px; } /* Full Width Iframe */ .full-width-iframe iframe { width: 100% !important; height: 85vh !important; border: none !important; border-radius: 8px !important; background: white !important; } /* Dynamic URL Input Rows (Batch Tab) */ .url-input-row { background: rgba(255,255,255,0.03) !important; border: 1px solid rgba(255,255,255,0.1) !important; border-radius: 12px !important; padding: 8px 12px !important; margin-bottom: 8px !important; transition: all 0.2s ease !important; } .url-input-row:hover { background: rgba(255,255,255,0.06) !important; border-color: var(--aether-accent) !important; } .url-single-input input, .url-single-input textarea { background: transparent !important; border: none !important; color: white !important; font-size: 14px !important; } .url-single-input input::placeholder { color: #666 !important; } .url-delete-btn { background: rgba(239, 68, 68, 0.2) !important; border: 1px solid rgba(239, 68, 68, 0.3) !important; border-radius: 8px !important; color: #ef4444 !important; font-size: 16px !important; min-width: 40px !important; max-width: 40px !important; height: 40px !important; padding: 0 !important; transition: all 0.2s ease !important; } .url-delete-btn:hover { background: rgba(239, 68, 68, 0.4) !important; transform: scale(1.1); } .action-pill-secondary { background: rgba(99, 102, 241, 0.2) !important; border: 1px solid rgba(99, 102, 241, 0.3) !important; border-radius: 50px !important; color: white !important; font-weight: 600 !important; padding: 12px 24px !important; transition: all 0.2s ease !important; } .action-pill-secondary:hover { background: rgba(99, 102, 241, 0.4) !important; transform: translateY(-2px); } /* Simple Row Alignment - Vertically center items in hero rows */ .hero-section .row, .hero-section .gr-row { align-items: center !important; } /* Premium Tab - Constrained width for cleaner look */ .premium-tab { max-width: 900px !important; margin-left: auto !important; margin-right: auto !important; } /* Export Row - Force equal width columns */ .export-row > div { flex: 1 1 33.33% !important; max-width: 33.33% !important; min-width: 0 !important; } """ # ============================================================================ # UI CONTEXT MOCK # ============================================================================ # Singleton scraper for UI _ui_scraper = None _ui_config = None async def get_ui_context(): """Get or create UI context with scraper.""" global _ui_scraper, _ui_config if _ui_scraper is None: _ui_config = MCPConfig.from_env() # Ensure outputs dir exists os.makedirs("outputs", exist_ok=True) os.makedirs("outputs/batch", exist_ok=True) _ui_scraper = ScraperService(max_workers=2) await _ui_scraper.ensure_initialized() return _ui_scraper, _ui_config # ============================================================================ # VOICE DATA # ============================================================================ def get_voice_choices(): """Get voice choices for dropdown.""" choices = [] choices.append(("🎙️ George (British, warm) - DEFAULT", "george")) choices.append(("🎙️ Adam (American, deep)", "adam")) choices.append(("🎙️ Pearson (Newsreader)", "sarah")) for category, voices in VOICE_CATEGORIES.items(): for voice in voices: if voice not in ["george", "adam", "sarah"]: choices.append((f"{category.title()}: {voice.title()}", voice)) return choices VOICE_CHOICES = get_voice_choices() # ============================================================================ # TAB 1: DISCOVER # ============================================================================ async def search_articles(query: str, max_articles: int = 5): """Search Medium articles.""" if not query: return "
Please enter a search query.
" try: scraper, config = await get_ui_context() gr.Info(f"Searching for '{query}'...") results = await scraper.scrape_search(query, max_articles=max_articles) return render_cards(results, query=query) except Exception as e: return f"
Error: {str(e)}
" async def browse_tag(tag: str, max_articles: int = 5): """Browse Medium tag.""" if not tag: return "
Please enter a tag.
" try: scraper, config = await get_ui_context() gr.Info(f"Browsing '{tag}'...") results = await scraper.scrape_tag(tag, max_articles=max_articles) return render_cards(results, query=tag) except Exception as e: return f"
Error: {str(e)}
" def render_empty_state(query: str, search_type: str, error: str = None): """Render a helpful empty state with suggestions.""" suggestions = [ "artificial-intelligence", "python", "machine-learning", "programming", "technology", "startup", "design" ] suggestion_chips = " ".join([ f"{s}" for s in suggestions[:5] ]) error_detail = "" if error: error_detail = f"

Debug: {error[:100]}

" return f"""
🔍

No articles found for "{query}"

{"Medium might be rate-limiting requests. Try again in a moment." if search_type == "search" else "This tag may not exist or have no recent articles."}

Try these popular topics:

{suggestion_chips}

💡 Tip: Use specific terms (e.g., "python async" instead of just "python")

{error_detail}
""" def render_cards(results, query: str = ""): if not results: return render_empty_state(query or "your search", "search") # Filter out any invalid results valid_results = [r for r in results if r and r.get('title') and r.get('url')] if not valid_results: return render_empty_state(query or "your search", "search") html = "
" for art in valid_results: title = art.get('title', 'No Title') url = art.get('url', '#') author = art.get('author', 'Unknown') if isinstance(author, dict): author = author.get('name', 'Unknown') # Ensure high-resolution image (upgrade any low-res URLs) raw_img = art.get('imageUrl', '') or 'https://miro.medium.com/v2/resize:fit:1400/1*jfdwtvU6V6g99q3G7gq7dQ.png' img = upgrade_medium_image_url(raw_img, target_width=1400) html += f"""

{title[:80]}

{author}
""" html += "
" return html # ============================================================================ # TAB 2: SCRAPE (UNIFIED) # ============================================================================ async def scrape_and_render_combined(url: str): """Scrape article and return HTML preview immediately.""" if not url: return "Please enter a URL." try: scraper, config = await get_ui_context() gr.Info("Scraping Article...") article = await scraper.scrape_article(url) if not article: return "Failed to scrape." # Render HTML using modified renderer (now full width + better styling) html = render_full_page(article) # Inject full width style just in case + iframe wrap return f"" except Exception as e: return f"Error: {e}" async def export_article(url: str, format: str): if not url: gr.Warning("Please enter a URL first") return None try: scraper, config = await get_ui_context() article = await scraper.scrape_article(url) title = article.get('title', 'export') safe_title = "".join(c if c.isalnum() else "_" for c in title)[:30] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if format == "pdf": # Primary: WeasyPrint if WEASYPRINT_AVAILABLE: try: html = render_full_page(article) filepath = f"outputs/{safe_title}_{timestamp}.pdf" WP_HTML(string=html).write_pdf(filepath) except Exception as e: gr.Info("⚠️ WeasyPrint failed. Switching to Robust Engine (Playwright)...") # Fallback: Playwright html = render_full_page(article) pdf_bytes = await scraper.render_pdf(html) filepath = f"outputs/{safe_title}_{timestamp}_pw.pdf" with open(filepath, "wb") as f: f.write(pdf_bytes) else: # Fallback: Playwright gr.Info("Generating PDF with Robust Engine (Playwright)...") html = render_full_page(article) pdf_bytes = await scraper.render_pdf(html) filepath = f"outputs/{safe_title}_{timestamp}.pdf" with open(filepath, "wb") as f: f.write(pdf_bytes) elif format == "html": html = render_full_page(article) filepath = f"outputs/{safe_title}_{timestamp}.html" with open(filepath, "w", encoding="utf-8") as f: f.write(html) elif format == "json": filepath = f"outputs/{safe_title}_{timestamp}.json" with open(filepath, "w", encoding="utf-8") as f: json.dump(article, f, indent=2, default=str) else: # markdown filepath = f"outputs/{safe_title}_{timestamp}.md" with open(filepath, "w", encoding="utf-8") as f: f.write(article.get('markdownContent', '')) gr.Info(f"✅ Exported: {filepath}") return filepath except Exception as e: gr.Warning(f"Export failed: {str(e)}") return None # ============================================================================ # TAB 3: BATCH # ============================================================================ async def batch_scrape(urls_text: str, max_concurrency: int = 5): if not urls_text.strip(): return "Please enter URLs.", None urls = [u.strip() for u in urls_text.strip().split('\n') if u.strip()] if len(urls) > 20: return "Max 20 URLs allowed.", None try: scraper, config = await get_ui_context() # Create batch directory batch_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") batch_dir = Path(f"outputs/batch/{batch_id}") batch_dir.mkdir(parents=True, exist_ok=True) gr.Info(f"Batch processing {len(urls)} articles...") results = [] semaphore = asyncio.Semaphore(max_concurrency) async def process(url): async with semaphore: try: art = await scraper.scrape_article(url) title = art.get('title', 'unknown') safe_title = "".join(c if c.isalnum() else "_" for c in title)[:30] # Use improved renderer html = render_full_page(art) file_name = "-" # Try PDF if available, else HTML try: if WEASYPRINT_AVAILABLE: try: file_path = batch_dir / f"{safe_title}.pdf" WP_HTML(string=html).write_pdf(str(file_path)) file_name = file_path.name except Exception: pdf_bytes = await scraper.render_pdf(html) file_path = batch_dir / f"{safe_title}.pdf" with open(file_path, "wb") as f: f.write(pdf_bytes) file_name = file_path.name else: pdf_bytes = await scraper.render_pdf(html) file_path = batch_dir / f"{safe_title}.pdf" with open(file_path, "wb") as f: f.write(pdf_bytes) file_name = file_path.name except Exception as e: print(f"PDF generation failed for {url}: {e}") # Final fallback -> HTML file_path = batch_dir / f"{safe_title}.html" with open(file_path, "w", encoding="utf-8") as f: f.write(html) file_name = file_path.name return {"status": "✅", "title": title, "file": file_name} except Exception as e: return {"status": "❌", "title": f"Failed: {url}", "file": "-"} tasks = [process(u) for u in urls] results = await asyncio.gather(*tasks) # Create ZIP zip_path = shutil.make_archive(f"outputs/batch_{batch_id}", 'zip', batch_dir) # Generate Premium Summary HTML success_count = sum(1 for r in results if r["status"] == "✅") fail_count = len(results) - success_count html_out = f'''

📦 Batch Results

✅ {success_count} Success {f'❌ {fail_count} Failed' if fail_count > 0 else ''}
''' for r in results: is_success = r["status"] == "✅" bg_color = "rgba(34,197,94,0.08)" if is_success else "rgba(239,68,68,0.08)" border_color = "rgba(34,197,94,0.3)" if is_success else "rgba(239,68,68,0.3)" status_color = "#22c55e" if is_success else "#ef4444" html_out += f'''
{r["status"]} {r["title"][:50]}{"..." if len(r["title"]) > 50 else ""}
{r["file"]}
''' html_out += '''
''' return html_out, zip_path except Exception as e: return f"Error: {e}", None # ============================================================================ # TAB 4: SONIC # ============================================================================ def clean_text_for_audio(text: str) -> str: """Clean markdown text for audio narration - remove all non-speakable content.""" import re # Remove code blocks (```...```) text = re.sub(r'```[\s\S]*?```', ' ', text) # Remove inline code (`...`) text = re.sub(r'`[^`]+`', ' ', text) # Remove URLs text = re.sub(r'https?://\S+', ' ', text) text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text # Remove markdown headers (# ## ###) text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE) # Remove markdown formatting text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # **bold** -> bold text = re.sub(r'\*([^*]+)\*', r'\1', text) # *italic* -> italic text = re.sub(r'__([^_]+)__', r'\1', text) text = re.sub(r'_([^_]+)_', r'\1', text) # Remove image references text = re.sub(r'!\[.*?\]\(.*?\)', ' ', text) # Remove HTML tags text = re.sub(r'<[^>]+>', ' ', text) # Remove special characters that don't belong in speech text = re.sub(r'[•◦▪▸►→←↑↓|~^]', ' ', text) # Normalize whitespace text = re.sub(r'\s+', ' ', text) text = text.strip() return text async def generate_audio(url, voice, summarize, max_chars): if not url: return '
⚠️ Please enter a URL
', None try: scraper, config = await get_ui_context() # Robust check for API Key api_key = os.environ.get("ELEVENLABS_API_KEY") if not api_key: return '
⚠️ ELEVENLABS_API_KEY missing
', None gr.Info("Scraping article...") art = await scraper.scrape_article(url) raw_text = art.get('markdownContent', '') title = art.get('title', 'Audio') # Clean text for audio FIRST - remove markdown, URLs, code, etc. text = clean_text_for_audio(raw_text) # Fallback if scraping returns minimal content if not text or len(text) < 50: return '
⚠️ Article too short
', None # Summarize with Groq (PRIMARY) or Gemini (BACKUP) if summarize != "none": groq_key = os.environ.get("GROQ_API_KEY") gemini_key = os.environ.get("GEMINI_API_KEY") summarize_success = False # HEAVILY GUARDRAILED PROMPT - 250 char limit for ElevenLabs prompt = f"""Summarize this article in EXACTLY 2-3 sentences (under 250 characters total). RULES: - Plain English only, no markdown/formatting - Must be under 250 characters - End with a complete sentence - Focus on the core message Title: {title} Content: {text[:2000]} Write a 250-character summary:""" # Try Groq first (PRIMARY - fastest) if groq_key and not summarize_success: try: gr.Info("Summarizing for audio with Groq...") client = Groq(api_key=groq_key) response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{"role": "user", "content": prompt}], max_tokens=100, # ~250 chars output temperature=0.5 # More focused ) summary = response.choices[0].message.content.strip() # Validate response if summary and len(summary) > 50: summary = clean_text_for_audio(summary) if len(summary) > 50 and not any(bad in summary.lower() for bad in ['```', 'http://', 'https://', '**', '##']): text = summary summarize_success = True except Exception as e: gr.Warning(f"Groq failed: {str(e)[:50]}, trying Gemini...") # Fallback to Gemini (BACKUP) if gemini_key and not summarize_success: try: gr.Info("Summarizing for audio with Gemini...") genai.configure(api_key=gemini_key) try: model = genai.GenerativeModel('gemini-2.0-flash') except: model = genai.GenerativeModel('gemini-1.5-flash-latest') response = await model.generate_content_async(prompt) summary = response.text.strip() if summary and len(summary) > 50: summary = clean_text_for_audio(summary) if len(summary) > 50 and not any(bad in summary.lower() for bad in ['```', 'http://', 'https://', '**', '##']): text = summary summarize_success = True else: gr.Warning("Summary had issues, using cleaned original") except Exception as e: gr.Warning(f"Gemini also failed: {str(e)[:50]}") # Final fallback: truncate original if not summarize_success: text = text[:max_chars] # Final safety check - ensure text is clean for TTS text = clean_text_for_audio(text) # Ensure text ends at a complete sentence (not abrupt) def ensure_clean_ending(txt): """Ensure text ends at a complete sentence.""" if not txt: return txt # Find the last sentence-ending punctuation last_period = txt.rfind('.') last_question = txt.rfind('?') last_exclaim = txt.rfind('!') last_end = max(last_period, last_question, last_exclaim) if last_end > len(txt) * 0.5: # Only trim if we keep at least half return txt[:last_end + 1].strip() return txt text = ensure_clean_ending(text) if len(text) < 20: return '
⚠️ Not enough clean text for audio
', None # HARD LIMIT: 250 characters for ElevenLabs if len(text) > 250: text = text[:250].rsplit(' ', 1)[0] + '.' # Cut at word boundary gr.Info(f"Generating audio ({len(text)} chars)...") try: from elevenlabs.client import ElevenLabs client = ElevenLabs(api_key=api_key, timeout=60.0) voice_id = get_voice_id(voice) audio = client.text_to_speech.convert( text=text, voice_id=voice_id, model_id="eleven_multilingual_v2" ) out_path = f"outputs/audio_{uuid.uuid4().hex[:6]}.mp3" with open(out_path, "wb") as f: for chunk in audio: f.write(chunk) return f'
✅ Ready: {title}
', out_path except Exception as e: return f'
⚠️ ElevenLabs Error: {str(e)}
', None except Exception as e: return f'
❌ Error: {e}
', None # ============================================================================ # TAB 5: INTELLIGENCE (ENHANCED) # ============================================================================ # Global to store last report for export last_report_html = None async def analyst_report(topic): """Generate analyst report by searching and synthesizing Medium articles.""" global last_report_html if not topic: return "Please enter a topic." groq_key = os.environ.get("GROQ_API_KEY") gemini_key = os.environ.get("GEMINI_API_KEY") openai_key = os.environ.get("OPENAI_API_KEY") if not groq_key and not gemini_key and not openai_key: return "⚠️ Error: No AI API keys found. Set GROQ_API_KEY, GEMINI_API_KEY, or OPENAI_API_KEY in your .env file." max_articles = 5 try: scraper, config = await get_ui_context() gr.Info(f"Analyst: Researching '{topic}'...") # 1. Search for articles articles = await scraper.scrape_search(topic, max_articles=max_articles) if not articles: return f"""

No Results Found

No articles found for '{topic}'. Try a broader or different search term.

""" gr.Info(f"Found {len(articles)} articles. Extracting content...") # 2. Extract content from articles async def get_article_content(art): url = art.get('url') title = art.get('title', 'Untitled') author = art.get('author', {}).get('name') if isinstance(art.get('author'), dict) else art.get('author', 'Unknown') try: full_art = await scraper.scrape_article(url) content = full_art.get("markdownContent", "")[:2000] except: content = "(Content unavailable)" return f"\nTitle: {title}\nAuthor: {author}\nURL: {url}\nContent:\n{content}\n" results = await asyncio.gather(*[get_article_content(art) for art in articles]) context_text = "".join(results) # 3. Create synthesis prompt prompt = f"""You are a tech analyst. Synthesize the following Medium articles into a 'State of the Union' report. Topic: {topic} Structure your report: 1. Executive Summary (2-3 sentences) 2. Key Trends 3. Notable Insights 4. Contrarian Views (if any) 5. Recommended Reading Articles: {context_text} """ gr.Info("Analyst: Synthesizing report...") report_content = "" # 4. Try Groq first (PRIMARY - fastest) if groq_key: try: client = Groq(api_key=groq_key) response = client.chat.completions.create( model="llama-3.3-70b-versatile", # Best model for synthesis messages=[{"role": "user", "content": prompt}], max_tokens=2000, temperature=0.7 ) report_content = response.choices[0].message.content gr.Info("Report generated via Groq") except Exception as e: gr.Warning(f"Groq failed: {str(e)[:100]}, trying Gemini...") # 5. Fallback to Gemini if not report_content and gemini_key: try: genai.configure(api_key=gemini_key) try: model = genai.GenerativeModel('gemini-2.0-flash') except: model = genai.GenerativeModel('gemini-1.5-flash-latest') response = await model.generate_content_async(prompt) report_content = response.text gr.Info("Report generated via Gemini") except Exception as e: gr.Warning(f"Gemini failed: {str(e)[:100]}, trying OpenAI...") # 6. Fallback to OpenAI if not report_content and openai_key: try: from openai import AsyncOpenAI client = AsyncOpenAI(api_key=openai_key) response = await client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": prompt}] ) report_content = response.choices[0].message.content gr.Info("Report generated via GPT-4") except Exception as e: gr.Warning(f"OpenAI also failed: {str(e)[:100]}") # 6. Fallback: Rich article list (when AI quota exceeded) if not report_content: report_content = f"

📊 Research Digest: {topic}

" report_content += "
Note: AI synthesis services are currently unavailable (quota exceeded). Below is a curated list of the top articles found.
" report_content += f"

🔍 {len(articles)} Key Articles

" for idx, art in enumerate(articles, 1): title = art.get('title', 'Unknown Title') author = art.get('author', {}).get('name', 'Unknown') if isinstance(art.get('author'), dict) else 'Unknown' url = art.get('url', '#') reading_time = art.get('readingTime', 'N/A') claps = art.get('claps', 0) report_content += f"""

{idx}. {title}

✍️ {author} {'| ⏱️ ' + str(reading_time) + ' min read' if reading_time != 'N/A' else ''} {' | 👏 ' + str(claps) + ' claps' if claps > 0 else ''}

""" report_content += """

💡 To Get AI Analysis

To enable AI-powered synthesis of these articles:

  1. Gemini (Free): Get a free API key at Google AI Studio
  2. OpenAI (Paid): Add billing at OpenAI Platform
  3. Update your .env file with the new key
  4. Restart the UI
""" # Convert markdown to HTML if needed if not report_content.startswith("<"): import markdown report_content = markdown.markdown(report_content) # Wrap in digital paper final_html = f"
{report_content}
" last_report_html = final_html return final_html except Exception as e: error_msg = str(e) return f"""

⚠️ Analysis Error

The analyst encountered an issue while researching '{topic}'.

Error: {error_msg}

Troubleshooting:

""" async def export_report_pdf(): global last_report_html if not last_report_html: gr.Warning("No report generated yet") return None try: scraper, config = await get_ui_context() timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") filepath = f"outputs/analyst_report_{timestamp}.pdf" # Apply NEW RENDERER TEMPLATE for polished PDF # We reuse RENDERER_TEMPLATE from html_renderer.py which has the "clean" CSS # We inject the content into the template styled_html = RENDERER_TEMPLATE.format(title="Analyst Report", content=last_report_html) if WEASYPRINT_AVAILABLE: try: WP_HTML(string=styled_html).write_pdf(filepath) except Exception: gr.Info("Using Robust PDF Engine...") pdf_bytes = await scraper.render_pdf(styled_html) with open(filepath, "wb") as f: f.write(pdf_bytes) else: gr.Info("Using Robust PDF Engine...") pdf_bytes = await scraper.render_pdf(styled_html) with open(filepath, "wb") as f: f.write(pdf_bytes) gr.Info(f"✅ PDF Exported: {filepath}") return filepath except Exception as e: gr.Warning(f"Export failed: {str(e)}") return None # ============================================================================ # TAB 6: SETTINGS # ============================================================================ def render_settings(): keys = { "Groq": "GROQ_API_KEY", # PRIMARY LLM "ElevenLabs": "ELEVENLABS_API_KEY", "Gemini": "GEMINI_API_KEY", # BACKUP LLM "OpenAI": "OPENAI_API_KEY" # BACKUP LLM } html = "

System Status

" for name, key in keys.items(): is_set = bool(os.environ.get(key)) cls = "active" if is_set else "missing" icon = "✅" if is_set else "❌" text = "Active & Ready" if is_set else "Missing Configuration" html += f"""
{icon}
{name}
{text}
""" return html def refresh_settings(): return render_settings() # ============================================================================ # BUILD UI # ============================================================================ with gr.Blocks(title="Project Aether") as demo: gr.HTML(f"") with gr.Tabs(): # 0. HERO - Landing page with feature overview with gr.TabItem("✦ Home"): gr.HTML('''

Project Aether

The Ultimate Medium Intelligence Platform

Extract, analyze, and transform Medium articles into actionable insights with AI-powered tools.

🔍

Discover

Search and browse Medium articles by topic or tag. Find the best content instantly.

📄

Article Reader

Extract and view any Medium article. Export to PDF, Markdown, HTML, or JSON.

📦

Bulk Archiver

Archive up to 20 articles at once. Download as a ZIP with PDFs ready to read offline.

🎧

Article to Audio

Convert articles to podcast-style audio. 43+ premium voices powered by ElevenLabs.

🧠

AI Analyst

Generate comprehensive intelligence reports. AI-powered synthesis using Groq & GPT-4.

⚙️

Configuration

Manage API keys and system status. Connect ElevenLabs, Groq, and OpenAI.

Built with 🔮 by T0X1N

''') # 1. DISCOVER - Full width like Sonic with gr.TabItem("🔍 Discover"): with gr.Column(elem_classes="hero-section"): # Centered Hero Card gr.HTML('''
🔍
Discover

Explore Medium

Search topics or browse by tag to find articles

''') # Search Input d_search = gr.Textbox(placeholder="Search topics...", show_label=False, elem_classes="omnibar") with gr.Row(): d_btn = gr.Button("🔍 Search", variant="primary", elem_classes="action-pill", scale=1) # Tag Browse Section gr.HTML('

📂 Browse by Tag

') d_tag_input = gr.Textbox(placeholder="Enter a tag (e.g., python, ai, startup)...", show_label=False, elem_classes="omnibar") with gr.Row(): d_tag_btn = gr.Button("📂 Browse Tag", variant="secondary", elem_classes="action-pill", scale=1) # Settings with gr.Row(): d_slider = gr.Slider(1, 10, value=5, step=1, label="Max Articles") # Results d_out = gr.HTML( value='
🔍 Enter a search term or tag above
', elem_classes="discover-results" ) d_btn.click(search_articles, inputs=[d_search, d_slider], outputs=d_out) d_tag_btn.click(browse_tag, inputs=[d_tag_input, d_slider], outputs=d_out) # 2. SCRAPE - Premium layout with centered hero card with gr.TabItem("📄 Scrape"): with gr.Column(elem_classes="hero-section"): # Centered Hero Card gr.HTML('''
📄
Scrape

Article Reader

Extract and view any Medium article

''') # URL Input s_url = gr.Textbox(placeholder="Paste Medium Article URL...", show_label=False, elem_classes="omnibar") with gr.Row(): s_btn = gr.Button("📄 Scrape Article", variant="primary", elem_classes="action-pill", scale=1) # Article View gr.HTML('

📖 Article View

') s_out = gr.HTML( value='
📄 Paste a URL above to view article
', ) # Export Section gr.HTML('

📦 Export Archive

') with gr.Row(): s_fmt = gr.Dropdown(["pdf", "markdown", "html", "json"], value="pdf", label="Format", scale=1) s_exp_btn = gr.Button("📥 Download", variant="primary", elem_classes="action-pill", scale=1) s_file = gr.File(label="Downloaded File", show_label=True, interactive=False) s_btn.click(scrape_and_render_combined, inputs=s_url, outputs=s_out) s_exp_btn.click(export_article, inputs=[s_url, s_fmt], outputs=s_file) # 3. BATCH - Premium layout with centered hero card with gr.TabItem("📦 Batch"): with gr.Column(elem_classes="hero-section"): # Centered Hero Card gr.HTML('''
📦
Batch

Bulk Archiver

Archive up to 20 articles at once as PDFs

''') # State to track URL list url_list = gr.State([""]) # Start with one empty slot # Dynamic URL inputs container @gr.render(inputs=url_list) def render_url_inputs(urls): for i, url in enumerate(urls): with gr.Row(elem_classes="url-input-row"): url_box = gr.Textbox( value=url, placeholder=f"URL #{i+1} - Paste Medium article link...", show_label=False, elem_classes="url-single-input", scale=6, container=False ) del_btn = gr.Button( "❌", scale=1, min_width=50, elem_classes="url-delete-btn", variant="secondary" ) def update_url(new_url, urls_list, idx=i): urls_list[idx] = new_url return urls_list url_box.change(update_url, inputs=[url_box, url_list], outputs=url_list) def delete_url(urls_list, idx=i): if len(urls_list) > 1: return urls_list[:idx] + urls_list[idx+1:] return urls_list del_btn.click(delete_url, inputs=url_list, outputs=url_list) # Action Buttons with gr.Row(): add_url_btn = gr.Button("➕ Add URL", variant="secondary", elem_classes="action-pill-secondary") b_btn = gr.Button("🚀 Process Archive", variant="primary", elem_classes="action-pill") def add_url(urls_list): if len(urls_list) < 20: return urls_list + [""] return urls_list add_url_btn.click(add_url, inputs=url_list, outputs=url_list) # Results b_html = gr.HTML( value='
📦 Add URLs above and click Process
', ) b_zip = gr.File(label="Download Archive (ZIP)") async def process_batch_from_list(urls_list): urls_text = "\n".join([u for u in urls_list if u.strip()]) return await batch_scrape(urls_text) b_btn.click(process_batch_from_list, inputs=url_list, outputs=[b_html, b_zip]) # 4. SONIC (AUDIO) - POLISHED UI with gr.TabItem("🎧 Sonic"): with gr.Column(elem_classes="hero-section"): # Compact Centered Hero Card gr.HTML('''
🎧
Sonic

Article to Audio

Convert any Medium article into podcast-style narration

''') # URL Input so_url = gr.Textbox(placeholder="Paste Medium Article URL...", show_label=False, elem_classes="omnibar") # Voice Selection (standalone like Discover's tag input) so_voice = gr.Dropdown(VOICE_CHOICES, value="george", label="🎙️ Voice") # Generate Button (in own row like Discover's search button) with gr.Row(): so_btn = gr.Button("🔊 Generate Audio", variant="primary", elem_classes="action-pill", scale=1) # Results Section so_status = gr.HTML( value='
🎵 Ready to generate
', ) so_player = gr.Audio(label=None, show_label=False, type="filepath") so_btn.click(generate_audio, inputs=[so_url, so_voice, gr.State("auto"), gr.State(250)], outputs=[so_status, so_player]) # 5. INTELLIGENCE - Premium layout matching Sonic tab style with gr.TabItem("🧠 Intelligence"): with gr.Column(elem_classes="hero-section"): # Centered Hero Card (like Sonic) gr.HTML('''
🧠
Intel

Analyst Dashboard

Generate comprehensive intelligence reports using AI-powered analysis

''') # Topic Input (like Sonic's URL input) i_topic = gr.Textbox(placeholder="Enter a topic for deep analysis...", show_label=False, elem_classes="omnibar") # Generate Row (balanced like Sonic) with gr.Row(): i_btn = gr.Button("🔍 Generate Report", variant="primary", elem_classes="action-pill", scale=1) # Output Area i_out = gr.HTML( value='
📊 Ready to analyze
', ) # Export Section - matches Scrape tab layout gr.HTML('

📦 Export Report

') with gr.Row(): i_exp_btn = gr.Button("📥 Download PDF", variant="primary", elem_classes="action-pill", scale=1) i_file = gr.File(label="Downloaded File", show_label=True, interactive=False) i_btn.click(analyst_report, inputs=i_topic, outputs=i_out) i_exp_btn.click(export_report_pdf, outputs=i_file) # 6. SETTINGS - Premium layout matching Sonic tab style with gr.TabItem("⚙️ Settings"): with gr.Column(elem_classes="hero-section"): # Centered Hero Card (like Sonic) gr.HTML('''
⚙️
Config

Configuration Status

API keys and system configuration

''') set_html = gr.HTML(render_settings()) set_refresh = gr.Button("Refresh Status", elem_classes="action-pill") set_refresh.click(refresh_settings, outputs=set_html) if __name__ == "__main__": print("--- DEPOLOYMENT_VERSION_3_FORCE_BLACK ---") demo.launch(server_name="0.0.0.0", server_port=7860, share=False)