# yahoo_finance_scraper.py
import feedparser
from urllib.parse import quote
from datetime import datetime, timedelta, timezone
import calendar
from bs4 import BeautifulSoup 

class YahooFinanceScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }

    def _parse_feed(self, url, max_articles=10):
        news_list = []
        try:
            cutoff_date_utc = datetime.now(timezone.utc) - timedelta(days=7)
            feed = feedparser.parse(url, agent=self.headers['User-Agent'])

            for entry in feed.entries:
                published_struct = entry.get('published_parsed')
                if not published_struct:
                    continue 

                try:
                    article_timestamp = calendar.timegm(published_struct)
                    article_date_utc = datetime.fromtimestamp(article_timestamp, tz=timezone.utc)
                except Exception:
                    continue 

                if article_date_utc >= cutoff_date_utc:
                    raw_summary_html = entry.get('summary', '')
                    soup = BeautifulSoup(raw_summary_html, 'html.parser')
                    clean_summary = soup.get_text()

                    news_list.append({
                        'title': entry.get('title', 'No title'),
                        'link': entry.get('link', '').split('&url=')[-1],
                        'summary': clean_summary[:300], 
                        'published': article_date_utc.isoformat() 
                    })
                    if len(news_list) >= max_articles:
                        break
        except Exception as e:
            print(f"Error parsing feed: {e}")
            # เราจะไม่คืนค่า fallback จากตรงนี้
            
        return news_list

    def get_latest_news(self, symbol="", max_articles=10):
        """
        ดึงข่าวล่าสุด (แก้ไข: ลบ Fallback ที่ไม่เกี่ยวข้องออก)
        """
        if symbol:
            url = f"https://finance.yahoo.com/rss/quotes/{symbol.upper()}"
        else:
            # ถ้าไม่มี symbol ให้ใช้ Top Stories (นี่คือ fallback ที่ถูกต้อง)
            url = "https://finance.yahoo.com/news/rssindex"
            
        news_list = self._parse_feed(url, max_articles)
        
        # --- ลบการเรียก Fallback ที่ไม่เกี่ยวข้องทิ้ง ---
        # if symbol and not news_list:
        #     return self._get_fallback_news(max_articles) # <--- นี่คือตัวที่ทำให้เกิดข่าวขยะ
        
        return news_list # คืนค่า list (แม้ว่ามันจะว่างเปล่า ซึ่งถูกต้องแล้ว)

    def _get_fallback_news(self, max_articles):
        # (ฟังก์ชันนี้ยังอยู่ แต่ get_latest_news จะไม่เรียกมั่วแล้ว)
        try:
            url = "https://finance.yahoo.com/rss/topstories"
            return self._parse_feed(url, max_articles)
        except:
            return [{'title': 'Unable to fetch news', 'link': '', 'summary': 'Please try again later', 'published': 'N/A'}]

    def search_news(self, keyword, max_articles=10):
        # (ฟังก์ชันนี้ไม่ได้ใช้ใน app.py แต่เก็บไว้)
        if not keyword:
            return self.get_latest_news(max_articles=max_articles) 
        try:
            safe_keyword = quote(keyword)
            url = f"https://news.google.com/rss/search?q={safe_keyword}+site:finance.yahoo.com&hl=en-US&gl=US&ceid=US:en"
            news_list = self._parse_feed(url, max_articles)
            return news_list if news_list else self._get_fallback_news(max_articles)
        except Exception as e:
            print(f"Search error: {e}")
            return self._get_fallback_news(max_articles)