Spaces:

Warisamm748
/

3.11.2568

Runtime error

3.11.2568

File size: 13,218 Bytes

import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import
from langchain_groq import ChatGroq
import json
import os
from transformers import pipeline

# --- 0. CONFIGURATION & INITIALIZATION ---

# การตั้งค่า Groq API Key
# โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space
GROQ_API_KEY = os.getenv('GROQ_API_KEY') 
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).")
    st.stop()
    
# Initialize the LLM model
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")

# --- 1. SCRAPING FUNCTION (Yahoo Finance Only) ---

def extract_titles_and_summaries(company_name, num_articles=10):
    """ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก"""
    url = 'https://finance.yahoo.com/news/' 
    
    try:
        # เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) 
        if response.status_code != 200:
            st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}")
            return []
            
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Selector สำหรับรายการข่าว
        articles = soup.find_all('li', class_='stream-item', limit=num_articles)
        
        extracted_articles = []
        for article in articles:
            # Title Tag: h3
            title_tag = article.find('h3') 
            title = "No Title Found"
            
            if title_tag:
                link_tag = title_tag.find('a')
                title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True)
            
            # Summary Tag: p
            summary_tag = article.find('p') 
            summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
            
            extracted_articles.append({
                "Source": "Yahoo Finance",
                "Title": title,
                "Summary": summary
            })
            
        return {
            "Company": company_name if company_name else "General Market News",
            "Articles": extracted_articles
        }
        
    except Exception as e:
        st.error(f"An error occurred during scraping: {e}")
        return []

# --- 2. ANALYSIS FUNCTIONS ---

def perform_sentiment_analysis(news_data):
    """ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment"""
    # ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ 
    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1) 
    
    articles = news_data.get("Articles", [])
    sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
    
    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        sentiment_result = pipe(content)[0]
        sentiment_map = {
            "positive": "Positive",
            "negative": "Negative",
            "neutral": "Neutral",
            "very positive": "Positive",
            "very negative": "Negative"
        }
        sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
        score = float(sentiment_result["score"])
        
        article["Sentiment"] = sentiment
        article["Score"] = score
        if sentiment in sentiment_counts:
            sentiment_counts[sentiment] += 1
            
    return news_data, sentiment_counts

def extract_topics_with_hf(news_data):
    """ใช้ Hugging Face Pipeline สกัดหัวข้อ"""
    structured_data = {
        "Company": news_data.get("Company", "Unknown"),
        "Articles": []
    }
    topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1)
    articles = news_data.get("Articles", [])
    
    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        topics_result = topic_pipe(content, top_k=3)
        topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
        
        structured_data["Articles"].append({
            "Source": article.get("Source", "Unknown"), 
            "Title": article["Title"],
            "Summary": article["Summary"],
            "Sentiment": article.get("Sentiment", "Unknown"),
            "Score": article.get("Score", 0.0),
            "Topics": topics
        })
        
    return structured_data

def extract_json(response):
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        return {}

def generate_final_sentiment(news_data, sentiment_counts):
    """ใช้ LLM สรุปผลลัพธ์สุดท้าย"""
    company_name = news_data["Company"]
    total_articles = sum(sentiment_counts.values())
    combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]])
    
    prompt = f"""
    Based on the analysis of {total_articles} articles about the company "{company_name}":
    - Positive articles: {sentiment_counts['Positive']}
    - Negative articles: {sentiment_counts['Negative']}
    - Neutral articles: {sentiment_counts['Neutral']}
    
    Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
    
    Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
    """
    response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
    final_sentiment = response if response else "Sentiment analysis summary not available."
    return final_sentiment.content

def compare_articles(news_data, sentiment_counts):
    """ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว"""
    articles = news_data.get("Articles", [])
    all_topics = [set(article["Topics"]) for article in articles]
    common_topics = set.intersection(*all_topics) if all_topics else set()
    
    # 1. ให้ LLM หาหัวข้อร่วม (Common Topics)
    topics_prompt = f"""
    Analyze the following article topics and identify **only three** key themes that are common across multiple articles,     
    even if they are phrased differently. The topics from each article are:
    {all_topics}
    Respond **ONLY** with a JSON format:
    {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
    """
    response = llm.invoke([HumanMessage(content=topics_prompt)]).content
    contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]
    
    # 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences)
    total_articles = sum(sentiment_counts.values())
    comparison_prompt = f"""
    Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance:
    - Sentiment distribution: {sentiment_counts}
    - Commonly discussed topics across articles: {contextual_common_topics}
    
    Consider the following:
    1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
    2. Overall implications for the company's reputation, stock potential, and public perception.
    
    Respond **ONLY** with a concise and insightful summary in this JSON format:
    {{
        "Coverage Differences": [
            {{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}},
            {{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}}
        ]
    }}
    """
    response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
    coverage_differences = extract_json(response).get("Coverage Differences", [])
    final_sentiment = generate_final_sentiment(news_data, sentiment_counts)

    return {
        "Company": news_data["Company"],
        "Articles": articles,
        "Comparative Sentiment Score": {
            "Sentiment Distribution": sentiment_counts,
            "Coverage Differences": coverage_differences,
            "Topic Overlap": {
                "Common Topics": contextual_common_topics,
                "Unique Topics": {
                    f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics))
                    for i, (topics, article) in enumerate(zip(all_topics, articles))
                }
            }
        },
        "Final Sentiment Analysis": final_sentiment
    }

# --- 3. STREAMLIT UI IMPLEMENTATION ---

def display_articles(articles):
    for i, article in enumerate(articles, start=1):
        st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**")
        st.write(f"- **Summary:** {article['Summary']}")
        st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
        st.write(f"- **Topics:** {', '.join(article['Topics'])}")

def display_sentiment_distribution(sentiment_distribution):
    st.markdown("#### **Sentiment Distribution:**")
    sentiment_data = {
        "Sentiment": list(sentiment_distribution.keys()),
        "Count": list(sentiment_distribution.values())
    }
    st.table(sentiment_data)

def display_coverage_differences(coverage_differences):
    if coverage_differences:
        st.markdown("#### **Coverage Differences:**")
        for diff in coverage_differences:
            comparison = diff.get('Comparison', 'No Comparison Detail')
            impact = diff.get('Impact', 'No Impact Detail')
            st.write(f"- **{comparison}:** {impact}")

def display_topic_overlap(topic_overlap):
    st.markdown("#### **Topic Overlap:**")
    st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}")
    st.markdown("- **Unique Topics by Article:**")
    for article, topics in topic_overlap.get("Unique Topics", {}).items():
        st.write(f"  - **{article}:** {', '.join(topics)}")

def run_analysis(company_name):
    # 1. ดึงข่าว
    with st.spinner('1/4 Scraping news from Yahoo Finance...'):
        news_data = extract_titles_and_summaries(company_name)
    
    if not news_data or not news_data['Articles']:
        st.warning("Could not find any articles or scraping failed.")
        return

    # 2. วิเคราะห์ Sentiment
    with st.spinner('2/4 Performing Sentiment Analysis...'):
        news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data)

    # 3. สกัด Topics
    with st.spinner('3/4 Extracting Topics...'):
        structured_data = extract_topics_with_hf(news_with_sentiment)
    
    # 4. เปรียบเทียบและสรุปผลด้วย LLM
    with st.spinner('4/4 Generating Final Report...'):
        final_report = compare_articles(structured_data, sentiment_counts)
    
    # 5. แสดงผลลัพธ์
    st.success("Analysis Complete!")
    
    data = final_report
    
    st.markdown("---")
    st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**")
    
    # Articles
    st.markdown("#### **Articles Analyzed:**")
    display_articles(data.get("Articles", []))
    
    # Comparative Sentiment Score
    st.markdown("#### **Comparative Sentiment Score:**")
    sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
    display_sentiment_distribution(sentiment_distribution)
    
    coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", [])
    display_coverage_differences(coverage_differences)
    
    topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {})
    display_topic_overlap(topic_overlap)
    
    # Final Sentiment Analysis Result
    st.markdown("#### **Final Sentiment Analysis Summary:**")
    st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
    st.markdown("---")
    
    st.json(final_report)

# --- MAIN STREAMLIT APP ---

st.set_page_config(layout="wide")
st.title("Company News Sentiment Analysis (Yahoo Finance)")

# Input field for company name
company_name = st.text_input(
    "Enter the company name/topic:",
    placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)"
)

if st.button("Generate Summary"):
    if company_name:
        run_analysis(company_name)
    else:
        st.warning("Please enter a company name or topic to proceed with the analysis!")