Spaces:
Runtime error
Runtime error
File size: 13,218 Bytes
a1e8780 6f622fa a112641 6f622fa a1e8780 6f622fa a1e8780 6f622fa a112641 6f622fa a112641 6f622fa a1e8780 6f622fa a1e8780 6f622fa a1e8780 6f622fa a1e8780 6f622fa a1e8780 6f622fa a1e8780 6f622fa a112641 6f622fa a1e8780 6f622fa a1e8780 6f622fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import
from langchain_groq import ChatGroq
import json
import os
from transformers import pipeline
# --- 0. CONFIGURATION & INITIALIZATION ---
# การตั้งค่า Groq API Key
# โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).")
st.stop()
# Initialize the LLM model
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
# --- 1. SCRAPING FUNCTION (Yahoo Finance Only) ---
def extract_titles_and_summaries(company_name, num_articles=10):
"""ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก"""
url = 'https://finance.yahoo.com/news/'
try:
# เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
if response.status_code != 200:
st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}")
return []
soup = BeautifulSoup(response.content, "html.parser")
# Selector สำหรับรายการข่าว
articles = soup.find_all('li', class_='stream-item', limit=num_articles)
extracted_articles = []
for article in articles:
# Title Tag: h3
title_tag = article.find('h3')
title = "No Title Found"
if title_tag:
link_tag = title_tag.find('a')
title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True)
# Summary Tag: p
summary_tag = article.find('p')
summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
extracted_articles.append({
"Source": "Yahoo Finance",
"Title": title,
"Summary": summary
})
return {
"Company": company_name if company_name else "General Market News",
"Articles": extracted_articles
}
except Exception as e:
st.error(f"An error occurred during scraping: {e}")
return []
# --- 2. ANALYSIS FUNCTIONS ---
def perform_sentiment_analysis(news_data):
"""ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment"""
# ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ
pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1)
articles = news_data.get("Articles", [])
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
for article in articles:
content = f"{article['Title']} {article['Summary']}"
sentiment_result = pipe(content)[0]
sentiment_map = {
"positive": "Positive",
"negative": "Negative",
"neutral": "Neutral",
"very positive": "Positive",
"very negative": "Negative"
}
sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
score = float(sentiment_result["score"])
article["Sentiment"] = sentiment
article["Score"] = score
if sentiment in sentiment_counts:
sentiment_counts[sentiment] += 1
return news_data, sentiment_counts
def extract_topics_with_hf(news_data):
"""ใช้ Hugging Face Pipeline สกัดหัวข้อ"""
structured_data = {
"Company": news_data.get("Company", "Unknown"),
"Articles": []
}
topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1)
articles = news_data.get("Articles", [])
for article in articles:
content = f"{article['Title']} {article['Summary']}"
topics_result = topic_pipe(content, top_k=3)
topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
structured_data["Articles"].append({
"Source": article.get("Source", "Unknown"),
"Title": article["Title"],
"Summary": article["Summary"],
"Sentiment": article.get("Sentiment", "Unknown"),
"Score": article.get("Score", 0.0),
"Topics": topics
})
return structured_data
def extract_json(response):
try:
return json.loads(response)
except json.JSONDecodeError:
return {}
def generate_final_sentiment(news_data, sentiment_counts):
"""ใช้ LLM สรุปผลลัพธ์สุดท้าย"""
company_name = news_data["Company"]
total_articles = sum(sentiment_counts.values())
combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]])
prompt = f"""
Based on the analysis of {total_articles} articles about the company "{company_name}":
- Positive articles: {sentiment_counts['Positive']}
- Negative articles: {sentiment_counts['Negative']}
- Neutral articles: {sentiment_counts['Neutral']}
Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
"""
response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
final_sentiment = response if response else "Sentiment analysis summary not available."
return final_sentiment.content
def compare_articles(news_data, sentiment_counts):
"""ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว"""
articles = news_data.get("Articles", [])
all_topics = [set(article["Topics"]) for article in articles]
common_topics = set.intersection(*all_topics) if all_topics else set()
# 1. ให้ LLM หาหัวข้อร่วม (Common Topics)
topics_prompt = f"""
Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
even if they are phrased differently. The topics from each article are:
{all_topics}
Respond **ONLY** with a JSON format:
{{"CommonTopics": ["topic1", "topic2", "topic3"]}}
"""
response = llm.invoke([HumanMessage(content=topics_prompt)]).content
contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]
# 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences)
total_articles = sum(sentiment_counts.values())
comparison_prompt = f"""
Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance:
- Sentiment distribution: {sentiment_counts}
- Commonly discussed topics across articles: {contextual_common_topics}
Consider the following:
1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
2. Overall implications for the company's reputation, stock potential, and public perception.
Respond **ONLY** with a concise and insightful summary in this JSON format:
{{
"Coverage Differences": [
{{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}},
{{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}}
]
}}
"""
response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
coverage_differences = extract_json(response).get("Coverage Differences", [])
final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
return {
"Company": news_data["Company"],
"Articles": articles,
"Comparative Sentiment Score": {
"Sentiment Distribution": sentiment_counts,
"Coverage Differences": coverage_differences,
"Topic Overlap": {
"Common Topics": contextual_common_topics,
"Unique Topics": {
f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics))
for i, (topics, article) in enumerate(zip(all_topics, articles))
}
}
},
"Final Sentiment Analysis": final_sentiment
}
# --- 3. STREAMLIT UI IMPLEMENTATION ---
def display_articles(articles):
for i, article in enumerate(articles, start=1):
st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**")
st.write(f"- **Summary:** {article['Summary']}")
st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
def display_sentiment_distribution(sentiment_distribution):
st.markdown("#### **Sentiment Distribution:**")
sentiment_data = {
"Sentiment": list(sentiment_distribution.keys()),
"Count": list(sentiment_distribution.values())
}
st.table(sentiment_data)
def display_coverage_differences(coverage_differences):
if coverage_differences:
st.markdown("#### **Coverage Differences:**")
for diff in coverage_differences:
comparison = diff.get('Comparison', 'No Comparison Detail')
impact = diff.get('Impact', 'No Impact Detail')
st.write(f"- **{comparison}:** {impact}")
def display_topic_overlap(topic_overlap):
st.markdown("#### **Topic Overlap:**")
st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}")
st.markdown("- **Unique Topics by Article:**")
for article, topics in topic_overlap.get("Unique Topics", {}).items():
st.write(f" - **{article}:** {', '.join(topics)}")
def run_analysis(company_name):
# 1. ดึงข่าว
with st.spinner('1/4 Scraping news from Yahoo Finance...'):
news_data = extract_titles_and_summaries(company_name)
if not news_data or not news_data['Articles']:
st.warning("Could not find any articles or scraping failed.")
return
# 2. วิเคราะห์ Sentiment
with st.spinner('2/4 Performing Sentiment Analysis...'):
news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data)
# 3. สกัด Topics
with st.spinner('3/4 Extracting Topics...'):
structured_data = extract_topics_with_hf(news_with_sentiment)
# 4. เปรียบเทียบและสรุปผลด้วย LLM
with st.spinner('4/4 Generating Final Report...'):
final_report = compare_articles(structured_data, sentiment_counts)
# 5. แสดงผลลัพธ์
st.success("Analysis Complete!")
data = final_report
st.markdown("---")
st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**")
# Articles
st.markdown("#### **Articles Analyzed:**")
display_articles(data.get("Articles", []))
# Comparative Sentiment Score
st.markdown("#### **Comparative Sentiment Score:**")
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
display_sentiment_distribution(sentiment_distribution)
coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", [])
display_coverage_differences(coverage_differences)
topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {})
display_topic_overlap(topic_overlap)
# Final Sentiment Analysis Result
st.markdown("#### **Final Sentiment Analysis Summary:**")
st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
st.markdown("---")
st.json(final_report)
# --- MAIN STREAMLIT APP ---
st.set_page_config(layout="wide")
st.title("Company News Sentiment Analysis (Yahoo Finance)")
# Input field for company name
company_name = st.text_input(
"Enter the company name/topic:",
placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)"
)
if st.button("Generate Summary"):
if company_name:
run_analysis(company_name)
else:
st.warning("Please enter a company name or topic to proceed with the analysis!")
|