Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import | |
| from langchain_groq import ChatGroq | |
| import json | |
| import os | |
| from transformers import pipeline | |
| # --- 0. CONFIGURATION & INITIALIZATION --- | |
| # การตั้งค่า Groq API Key | |
| # โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space | |
| GROQ_API_KEY = os.getenv('GROQ_API_KEY') | |
| if not GROQ_API_KEY: | |
| st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).") | |
| st.stop() | |
| # Initialize the LLM model | |
| llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant") | |
| # --- 1. SCRAPING FUNCTION (Yahoo Finance Only) --- | |
| def extract_titles_and_summaries(company_name, num_articles=10): | |
| """ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก""" | |
| url = 'https://finance.yahoo.com/news/' | |
| try: | |
| # เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก | |
| response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
| if response.status_code != 200: | |
| st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}") | |
| return [] | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| # Selector สำหรับรายการข่าว | |
| articles = soup.find_all('li', class_='stream-item', limit=num_articles) | |
| extracted_articles = [] | |
| for article in articles: | |
| # Title Tag: h3 | |
| title_tag = article.find('h3') | |
| title = "No Title Found" | |
| if title_tag: | |
| link_tag = title_tag.find('a') | |
| title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True) | |
| # Summary Tag: p | |
| summary_tag = article.find('p') | |
| summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found" | |
| extracted_articles.append({ | |
| "Source": "Yahoo Finance", | |
| "Title": title, | |
| "Summary": summary | |
| }) | |
| return { | |
| "Company": company_name if company_name else "General Market News", | |
| "Articles": extracted_articles | |
| } | |
| except Exception as e: | |
| st.error(f"An error occurred during scraping: {e}") | |
| return [] | |
| # --- 2. ANALYSIS FUNCTIONS --- | |
| def perform_sentiment_analysis(news_data): | |
| """ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment""" | |
| # ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ | |
| pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1) | |
| articles = news_data.get("Articles", []) | |
| sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} | |
| for article in articles: | |
| content = f"{article['Title']} {article['Summary']}" | |
| sentiment_result = pipe(content)[0] | |
| sentiment_map = { | |
| "positive": "Positive", | |
| "negative": "Negative", | |
| "neutral": "Neutral", | |
| "very positive": "Positive", | |
| "very negative": "Negative" | |
| } | |
| sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown") | |
| score = float(sentiment_result["score"]) | |
| article["Sentiment"] = sentiment | |
| article["Score"] = score | |
| if sentiment in sentiment_counts: | |
| sentiment_counts[sentiment] += 1 | |
| return news_data, sentiment_counts | |
| def extract_topics_with_hf(news_data): | |
| """ใช้ Hugging Face Pipeline สกัดหัวข้อ""" | |
| structured_data = { | |
| "Company": news_data.get("Company", "Unknown"), | |
| "Articles": [] | |
| } | |
| topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1) | |
| articles = news_data.get("Articles", []) | |
| for article in articles: | |
| content = f"{article['Title']} {article['Summary']}" | |
| topics_result = topic_pipe(content, top_k=3) | |
| topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"] | |
| structured_data["Articles"].append({ | |
| "Source": article.get("Source", "Unknown"), | |
| "Title": article["Title"], | |
| "Summary": article["Summary"], | |
| "Sentiment": article.get("Sentiment", "Unknown"), | |
| "Score": article.get("Score", 0.0), | |
| "Topics": topics | |
| }) | |
| return structured_data | |
| def extract_json(response): | |
| try: | |
| return json.loads(response) | |
| except json.JSONDecodeError: | |
| return {} | |
| def generate_final_sentiment(news_data, sentiment_counts): | |
| """ใช้ LLM สรุปผลลัพธ์สุดท้าย""" | |
| company_name = news_data["Company"] | |
| total_articles = sum(sentiment_counts.values()) | |
| combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]]) | |
| prompt = f""" | |
| Based on the analysis of {total_articles} articles about the company "{company_name}": | |
| - Positive articles: {sentiment_counts['Positive']} | |
| - Negative articles: {sentiment_counts['Negative']} | |
| - Neutral articles: {sentiment_counts['Neutral']} | |
| Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception. | |
| Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment. | |
| """ | |
| response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200) | |
| final_sentiment = response if response else "Sentiment analysis summary not available." | |
| return final_sentiment.content | |
| def compare_articles(news_data, sentiment_counts): | |
| """ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว""" | |
| articles = news_data.get("Articles", []) | |
| all_topics = [set(article["Topics"]) for article in articles] | |
| common_topics = set.intersection(*all_topics) if all_topics else set() | |
| # 1. ให้ LLM หาหัวข้อร่วม (Common Topics) | |
| topics_prompt = f""" | |
| Analyze the following article topics and identify **only three** key themes that are common across multiple articles, | |
| even if they are phrased differently. The topics from each article are: | |
| {all_topics} | |
| Respond **ONLY** with a JSON format: | |
| {{"CommonTopics": ["topic1", "topic2", "topic3"]}} | |
| """ | |
| response = llm.invoke([HumanMessage(content=topics_prompt)]).content | |
| contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] | |
| # 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences) | |
| total_articles = sum(sentiment_counts.values()) | |
| comparison_prompt = f""" | |
| Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance: | |
| - Sentiment distribution: {sentiment_counts} | |
| - Commonly discussed topics across articles: {contextual_common_topics} | |
| Consider the following: | |
| 1. Notable contrasts between articles (e.g., major differences in topics and perspectives). | |
| 2. Overall implications for the company's reputation, stock potential, and public perception. | |
| Respond **ONLY** with a concise and insightful summary in this JSON format: | |
| {{ | |
| "Coverage Differences": [ | |
| {{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}}, | |
| {{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}} | |
| ] | |
| }} | |
| """ | |
| response = llm.invoke([HumanMessage(content=comparison_prompt)]).content | |
| coverage_differences = extract_json(response).get("Coverage Differences", []) | |
| final_sentiment = generate_final_sentiment(news_data, sentiment_counts) | |
| return { | |
| "Company": news_data["Company"], | |
| "Articles": articles, | |
| "Comparative Sentiment Score": { | |
| "Sentiment Distribution": sentiment_counts, | |
| "Coverage Differences": coverage_differences, | |
| "Topic Overlap": { | |
| "Common Topics": contextual_common_topics, | |
| "Unique Topics": { | |
| f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics)) | |
| for i, (topics, article) in enumerate(zip(all_topics, articles)) | |
| } | |
| } | |
| }, | |
| "Final Sentiment Analysis": final_sentiment | |
| } | |
| # --- 3. STREAMLIT UI IMPLEMENTATION --- | |
| def display_articles(articles): | |
| for i, article in enumerate(articles, start=1): | |
| st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**") | |
| st.write(f"- **Summary:** {article['Summary']}") | |
| st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}") | |
| st.write(f"- **Topics:** {', '.join(article['Topics'])}") | |
| def display_sentiment_distribution(sentiment_distribution): | |
| st.markdown("#### **Sentiment Distribution:**") | |
| sentiment_data = { | |
| "Sentiment": list(sentiment_distribution.keys()), | |
| "Count": list(sentiment_distribution.values()) | |
| } | |
| st.table(sentiment_data) | |
| def display_coverage_differences(coverage_differences): | |
| if coverage_differences: | |
| st.markdown("#### **Coverage Differences:**") | |
| for diff in coverage_differences: | |
| comparison = diff.get('Comparison', 'No Comparison Detail') | |
| impact = diff.get('Impact', 'No Impact Detail') | |
| st.write(f"- **{comparison}:** {impact}") | |
| def display_topic_overlap(topic_overlap): | |
| st.markdown("#### **Topic Overlap:**") | |
| st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}") | |
| st.markdown("- **Unique Topics by Article:**") | |
| for article, topics in topic_overlap.get("Unique Topics", {}).items(): | |
| st.write(f" - **{article}:** {', '.join(topics)}") | |
| def run_analysis(company_name): | |
| # 1. ดึงข่าว | |
| with st.spinner('1/4 Scraping news from Yahoo Finance...'): | |
| news_data = extract_titles_and_summaries(company_name) | |
| if not news_data or not news_data['Articles']: | |
| st.warning("Could not find any articles or scraping failed.") | |
| return | |
| # 2. วิเคราะห์ Sentiment | |
| with st.spinner('2/4 Performing Sentiment Analysis...'): | |
| news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data) | |
| # 3. สกัด Topics | |
| with st.spinner('3/4 Extracting Topics...'): | |
| structured_data = extract_topics_with_hf(news_with_sentiment) | |
| # 4. เปรียบเทียบและสรุปผลด้วย LLM | |
| with st.spinner('4/4 Generating Final Report...'): | |
| final_report = compare_articles(structured_data, sentiment_counts) | |
| # 5. แสดงผลลัพธ์ | |
| st.success("Analysis Complete!") | |
| data = final_report | |
| st.markdown("---") | |
| st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**") | |
| # Articles | |
| st.markdown("#### **Articles Analyzed:**") | |
| display_articles(data.get("Articles", [])) | |
| # Comparative Sentiment Score | |
| st.markdown("#### **Comparative Sentiment Score:**") | |
| sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {}) | |
| display_sentiment_distribution(sentiment_distribution) | |
| coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", []) | |
| display_coverage_differences(coverage_differences) | |
| topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {}) | |
| display_topic_overlap(topic_overlap) | |
| # Final Sentiment Analysis Result | |
| st.markdown("#### **Final Sentiment Analysis Summary:**") | |
| st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available.")) | |
| st.markdown("---") | |
| st.json(final_report) | |
| # --- MAIN STREAMLIT APP --- | |
| st.set_page_config(layout="wide") | |
| st.title("Company News Sentiment Analysis (Yahoo Finance)") | |
| # Input field for company name | |
| company_name = st.text_input( | |
| "Enter the company name/topic:", | |
| placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)" | |
| ) | |
| if st.button("Generate Summary"): | |
| if company_name: | |
| run_analysis(company_name) | |
| else: | |
| st.warning("Please enter a company name or topic to proceed with the analysis!") | |