import streamlit as st import requests from bs4 import BeautifulSoup from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import from langchain_groq import ChatGroq import json import os from transformers import pipeline # --- 0. CONFIGURATION & INITIALIZATION --- # การตั้งค่า Groq API Key # โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space GROQ_API_KEY = os.getenv('GROQ_API_KEY') if not GROQ_API_KEY: st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).") st.stop() # Initialize the LLM model llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant") # --- 1. SCRAPING FUNCTION (Yahoo Finance Only) --- def extract_titles_and_summaries(company_name, num_articles=10): """ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก""" url = 'https://finance.yahoo.com/news/' try: # เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) if response.status_code != 200: st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}") return [] soup = BeautifulSoup(response.content, "html.parser") # Selector สำหรับรายการข่าว articles = soup.find_all('li', class_='stream-item', limit=num_articles) extracted_articles = [] for article in articles: # Title Tag: h3 title_tag = article.find('h3') title = "No Title Found" if title_tag: link_tag = title_tag.find('a') title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True) # Summary Tag: p summary_tag = article.find('p') summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found" extracted_articles.append({ "Source": "Yahoo Finance", "Title": title, "Summary": summary }) return { "Company": company_name if company_name else "General Market News", "Articles": extracted_articles } except Exception as e: st.error(f"An error occurred during scraping: {e}") return [] # --- 2. ANALYSIS FUNCTIONS --- def perform_sentiment_analysis(news_data): """ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment""" # ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1) articles = news_data.get("Articles", []) sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} for article in articles: content = f"{article['Title']} {article['Summary']}" sentiment_result = pipe(content)[0] sentiment_map = { "positive": "Positive", "negative": "Negative", "neutral": "Neutral", "very positive": "Positive", "very negative": "Negative" } sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown") score = float(sentiment_result["score"]) article["Sentiment"] = sentiment article["Score"] = score if sentiment in sentiment_counts: sentiment_counts[sentiment] += 1 return news_data, sentiment_counts def extract_topics_with_hf(news_data): """ใช้ Hugging Face Pipeline สกัดหัวข้อ""" structured_data = { "Company": news_data.get("Company", "Unknown"), "Articles": [] } topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1) articles = news_data.get("Articles", []) for article in articles: content = f"{article['Title']} {article['Summary']}" topics_result = topic_pipe(content, top_k=3) topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"] structured_data["Articles"].append({ "Source": article.get("Source", "Unknown"), "Title": article["Title"], "Summary": article["Summary"], "Sentiment": article.get("Sentiment", "Unknown"), "Score": article.get("Score", 0.0), "Topics": topics }) return structured_data def extract_json(response): try: return json.loads(response) except json.JSONDecodeError: return {} def generate_final_sentiment(news_data, sentiment_counts): """ใช้ LLM สรุปผลลัพธ์สุดท้าย""" company_name = news_data["Company"] total_articles = sum(sentiment_counts.values()) combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]]) prompt = f""" Based on the analysis of {total_articles} articles about the company "{company_name}": - Positive articles: {sentiment_counts['Positive']} - Negative articles: {sentiment_counts['Negative']} - Neutral articles: {sentiment_counts['Neutral']} Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception. Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment. """ response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200) final_sentiment = response if response else "Sentiment analysis summary not available." return final_sentiment.content def compare_articles(news_data, sentiment_counts): """ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว""" articles = news_data.get("Articles", []) all_topics = [set(article["Topics"]) for article in articles] common_topics = set.intersection(*all_topics) if all_topics else set() # 1. ให้ LLM หาหัวข้อร่วม (Common Topics) topics_prompt = f""" Analyze the following article topics and identify **only three** key themes that are common across multiple articles, even if they are phrased differently. The topics from each article are: {all_topics} Respond **ONLY** with a JSON format: {{"CommonTopics": ["topic1", "topic2", "topic3"]}} """ response = llm.invoke([HumanMessage(content=topics_prompt)]).content contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences) total_articles = sum(sentiment_counts.values()) comparison_prompt = f""" Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance: - Sentiment distribution: {sentiment_counts} - Commonly discussed topics across articles: {contextual_common_topics} Consider the following: 1. Notable contrasts between articles (e.g., major differences in topics and perspectives). 2. Overall implications for the company's reputation, stock potential, and public perception. Respond **ONLY** with a concise and insightful summary in this JSON format: {{ "Coverage Differences": [ {{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}}, {{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}} ] }} """ response = llm.invoke([HumanMessage(content=comparison_prompt)]).content coverage_differences = extract_json(response).get("Coverage Differences", []) final_sentiment = generate_final_sentiment(news_data, sentiment_counts) return { "Company": news_data["Company"], "Articles": articles, "Comparative Sentiment Score": { "Sentiment Distribution": sentiment_counts, "Coverage Differences": coverage_differences, "Topic Overlap": { "Common Topics": contextual_common_topics, "Unique Topics": { f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics)) for i, (topics, article) in enumerate(zip(all_topics, articles)) } } }, "Final Sentiment Analysis": final_sentiment } # --- 3. STREAMLIT UI IMPLEMENTATION --- def display_articles(articles): for i, article in enumerate(articles, start=1): st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**") st.write(f"- **Summary:** {article['Summary']}") st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}") st.write(f"- **Topics:** {', '.join(article['Topics'])}") def display_sentiment_distribution(sentiment_distribution): st.markdown("#### **Sentiment Distribution:**") sentiment_data = { "Sentiment": list(sentiment_distribution.keys()), "Count": list(sentiment_distribution.values()) } st.table(sentiment_data) def display_coverage_differences(coverage_differences): if coverage_differences: st.markdown("#### **Coverage Differences:**") for diff in coverage_differences: comparison = diff.get('Comparison', 'No Comparison Detail') impact = diff.get('Impact', 'No Impact Detail') st.write(f"- **{comparison}:** {impact}") def display_topic_overlap(topic_overlap): st.markdown("#### **Topic Overlap:**") st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}") st.markdown("- **Unique Topics by Article:**") for article, topics in topic_overlap.get("Unique Topics", {}).items(): st.write(f" - **{article}:** {', '.join(topics)}") def run_analysis(company_name): # 1. ดึงข่าว with st.spinner('1/4 Scraping news from Yahoo Finance...'): news_data = extract_titles_and_summaries(company_name) if not news_data or not news_data['Articles']: st.warning("Could not find any articles or scraping failed.") return # 2. วิเคราะห์ Sentiment with st.spinner('2/4 Performing Sentiment Analysis...'): news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data) # 3. สกัด Topics with st.spinner('3/4 Extracting Topics...'): structured_data = extract_topics_with_hf(news_with_sentiment) # 4. เปรียบเทียบและสรุปผลด้วย LLM with st.spinner('4/4 Generating Final Report...'): final_report = compare_articles(structured_data, sentiment_counts) # 5. แสดงผลลัพธ์ st.success("Analysis Complete!") data = final_report st.markdown("---") st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**") # Articles st.markdown("#### **Articles Analyzed:**") display_articles(data.get("Articles", [])) # Comparative Sentiment Score st.markdown("#### **Comparative Sentiment Score:**") sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {}) display_sentiment_distribution(sentiment_distribution) coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", []) display_coverage_differences(coverage_differences) topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {}) display_topic_overlap(topic_overlap) # Final Sentiment Analysis Result st.markdown("#### **Final Sentiment Analysis Summary:**") st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available.")) st.markdown("---") st.json(final_report) # --- MAIN STREAMLIT APP --- st.set_page_config(layout="wide") st.title("Company News Sentiment Analysis (Yahoo Finance)") # Input field for company name company_name = st.text_input( "Enter the company name/topic:", placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)" ) if st.button("Generate Summary"): if company_name: run_analysis(company_name) else: st.warning("Please enter a company name or topic to proceed with the analysis!")