#!/usr/bin/env python3
"""
rebuild_rag.py
Rebuilds ChromaDB from claims stored in SQLite and regenerates
the RAG explanation + analysis cache for all companies.
Run this instead of the full pipeline when only ChromaDB/RAG needs fixing.
"""

import sys
from pathlib import Path
from datetime import datetime

sys.path.insert(0, str(Path(__file__).resolve().parent))

import config
from db import init_db, get_db
from app.services.cache_service import get_ready_companies, get_claims, save_analysis
from pipelines.rag.build_vector_db import load_vector_db, build_vector_db
from pipelines.rag.rag_explainer import load_groq_client, run_rag_explanation

def _log(msg):
    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")

def _signal_title(sentence):
    words = sentence.split()
    return " ".join(words[:8]).rstrip(",.;") + ("…" if len(words) > 8 else "")

def _infer_verdict(pos, neg):
    if len(pos) > len(neg) * 1.5: return "Positive"
    if len(neg) > len(pos) * 1.5: return "Negative"
    return "Mixed"

def main():
    _log("Loading models...")
    init_db()
    load_vector_db(config.CHROMA_DB_PATH)
    load_groq_client()
    _log("Models ready.\n")

    companies = get_ready_companies()
    _log(f"Found {len(companies)} ready companies.\n")

    for company in companies:
        cid   = company["id"]
        name  = company["display_name"]
        folder = company["folder_name"]
        _log(f"Processing: {name}")

        # 1. Load claims from SQLite
        claims = get_claims(cid)
        if not claims:
            _log(f"  ⚠ No claims found in DB — skipping.\n")
            continue

        # 2. Build ChromaDB entries from claims + add company field
        docs_for_vector = [
            {
                "sentence": c.get("sentence", ""),
                "company":  name,
                "quarter":  c.get("quarter", ""),
                "result":   c.get("result", "UNVERIFIED"),
                "reason":   "CLAIM",
            }
            for c in claims if c.get("sentence")
        ]

        _log(f"  → Loading {len(docs_for_vector)} claims into ChromaDB...")
        status = build_vector_db(docs_for_vector, company=name)
        _log(f"  → Vector DB: {status}")

        # 3. Re-run RAG explanation
        _log(f"  → Generating RAG explanations...")
        query = f"{name} financial performance earnings guidance"

        # Load current cached analysis to get existing metrics/claims
        with get_db() as conn:
            row = conn.execute(
                "SELECT result_json FROM analysis_cache WHERE company_id=? AND mode='full'",
                (cid,)
            ).fetchone()

        import json
        existing = json.loads(row["result_json"]) if row else {}

        for mode in ("full", "earnings", "financial"):
            try:
                rag     = run_rag_explanation(query, company=name, top_k=config.RAG_TOP_K)
                pos_raw = rag.get("positive_signals", [])
                neg_raw = rag.get("negative_signals", [])

                result = {
                    **existing,               # keep metrics, claims, timeseries etc.
                    "mode":             mode,
                    "positive_signals": [
                        {"index": f"P{i+1}", "title": _signal_title(s), "body": s}
                        for i, s in enumerate(pos_raw)
                    ],
                    "negative_signals": [
                        {"index": f"N{i+1}", "title": _signal_title(s), "body": s}
                        for i, s in enumerate(neg_raw)
                    ],
                    "explanation": rag.get("explanation", ""),
                    "verdict":     _infer_verdict(pos_raw, neg_raw),
                }
                save_analysis(cid, mode, result)
            except Exception as e:
                _log(f"  ⚠ Mode '{mode}' failed: {e}")

        _log(f"  ✓ {name} done.\n")

    _log("All done.")

if __name__ == "__main__":
    main()