3.11.2568 / app.py
Warisamm748's picture
Update app.py
a112641 verified
Raw
History Blame
13.2 kB
import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import
from langchain_groq import ChatGroq
import json
import os
from transformers import pipeline
# --- 0. CONFIGURATION & INITIALIZATION ---
# การตั้งค่า Groq API Key
# โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).")
st.stop()
# Initialize the LLM model
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
# --- 1. SCRAPING FUNCTION (Yahoo Finance Only) ---
def extract_titles_and_summaries(company_name, num_articles=10):
"""ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก"""
url = 'https://finance.yahoo.com/news/'
try:
# เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
if response.status_code != 200:
st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}")
return []
soup = BeautifulSoup(response.content, "html.parser")
# Selector สำหรับรายการข่าว
articles = soup.find_all('li', class_='stream-item', limit=num_articles)
extracted_articles = []
for article in articles:
# Title Tag: h3
title_tag = article.find('h3')
title = "No Title Found"
if title_tag:
link_tag = title_tag.find('a')
title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True)
# Summary Tag: p
summary_tag = article.find('p')
summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
extracted_articles.append({
"Source": "Yahoo Finance",
"Title": title,
"Summary": summary
})
return {
"Company": company_name if company_name else "General Market News",
"Articles": extracted_articles
}
except Exception as e:
st.error(f"An error occurred during scraping: {e}")
return []
# --- 2. ANALYSIS FUNCTIONS ---
def perform_sentiment_analysis(news_data):
"""ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment"""
# ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ
pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1)
articles = news_data.get("Articles", [])
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
for article in articles:
content = f"{article['Title']} {article['Summary']}"
sentiment_result = pipe(content)[0]
sentiment_map = {
"positive": "Positive",
"negative": "Negative",
"neutral": "Neutral",
"very positive": "Positive",
"very negative": "Negative"
}
sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
score = float(sentiment_result["score"])
article["Sentiment"] = sentiment
article["Score"] = score
if sentiment in sentiment_counts:
sentiment_counts[sentiment] += 1
return news_data, sentiment_counts
def extract_topics_with_hf(news_data):
"""ใช้ Hugging Face Pipeline สกัดหัวข้อ"""
structured_data = {
"Company": news_data.get("Company", "Unknown"),
"Articles": []
}
topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1)
articles = news_data.get("Articles", [])
for article in articles:
content = f"{article['Title']} {article['Summary']}"
topics_result = topic_pipe(content, top_k=3)
topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
structured_data["Articles"].append({
"Source": article.get("Source", "Unknown"),
"Title": article["Title"],
"Summary": article["Summary"],
"Sentiment": article.get("Sentiment", "Unknown"),
"Score": article.get("Score", 0.0),
"Topics": topics
})
return structured_data
def extract_json(response):
try:
return json.loads(response)
except json.JSONDecodeError:
return {}
def generate_final_sentiment(news_data, sentiment_counts):
"""ใช้ LLM สรุปผลลัพธ์สุดท้าย"""
company_name = news_data["Company"]
total_articles = sum(sentiment_counts.values())
combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]])
prompt = f"""
Based on the analysis of {total_articles} articles about the company "{company_name}":
- Positive articles: {sentiment_counts['Positive']}
- Negative articles: {sentiment_counts['Negative']}
- Neutral articles: {sentiment_counts['Neutral']}
Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
"""
response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
final_sentiment = response if response else "Sentiment analysis summary not available."
return final_sentiment.content
def compare_articles(news_data, sentiment_counts):
"""ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว"""
articles = news_data.get("Articles", [])
all_topics = [set(article["Topics"]) for article in articles]
common_topics = set.intersection(*all_topics) if all_topics else set()
# 1. ให้ LLM หาหัวข้อร่วม (Common Topics)
topics_prompt = f"""
Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
even if they are phrased differently. The topics from each article are:
{all_topics}
Respond **ONLY** with a JSON format:
{{"CommonTopics": ["topic1", "topic2", "topic3"]}}
"""
response = llm.invoke([HumanMessage(content=topics_prompt)]).content
contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]
# 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences)
total_articles = sum(sentiment_counts.values())
comparison_prompt = f"""
Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance:
- Sentiment distribution: {sentiment_counts}
- Commonly discussed topics across articles: {contextual_common_topics}
Consider the following:
1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
2. Overall implications for the company's reputation, stock potential, and public perception.
Respond **ONLY** with a concise and insightful summary in this JSON format:
{{
"Coverage Differences": [
{{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}},
{{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}}
]
}}
"""
response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
coverage_differences = extract_json(response).get("Coverage Differences", [])
final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
return {
"Company": news_data["Company"],
"Articles": articles,
"Comparative Sentiment Score": {
"Sentiment Distribution": sentiment_counts,
"Coverage Differences": coverage_differences,
"Topic Overlap": {
"Common Topics": contextual_common_topics,
"Unique Topics": {
f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics))
for i, (topics, article) in enumerate(zip(all_topics, articles))
}
}
},
"Final Sentiment Analysis": final_sentiment
}
# --- 3. STREAMLIT UI IMPLEMENTATION ---
def display_articles(articles):
for i, article in enumerate(articles, start=1):
st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**")
st.write(f"- **Summary:** {article['Summary']}")
st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
def display_sentiment_distribution(sentiment_distribution):
st.markdown("#### **Sentiment Distribution:**")
sentiment_data = {
"Sentiment": list(sentiment_distribution.keys()),
"Count": list(sentiment_distribution.values())
}
st.table(sentiment_data)
def display_coverage_differences(coverage_differences):
if coverage_differences:
st.markdown("#### **Coverage Differences:**")
for diff in coverage_differences:
comparison = diff.get('Comparison', 'No Comparison Detail')
impact = diff.get('Impact', 'No Impact Detail')
st.write(f"- **{comparison}:** {impact}")
def display_topic_overlap(topic_overlap):
st.markdown("#### **Topic Overlap:**")
st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}")
st.markdown("- **Unique Topics by Article:**")
for article, topics in topic_overlap.get("Unique Topics", {}).items():
st.write(f" - **{article}:** {', '.join(topics)}")
def run_analysis(company_name):
# 1. ดึงข่าว
with st.spinner('1/4 Scraping news from Yahoo Finance...'):
news_data = extract_titles_and_summaries(company_name)
if not news_data or not news_data['Articles']:
st.warning("Could not find any articles or scraping failed.")
return
# 2. วิเคราะห์ Sentiment
with st.spinner('2/4 Performing Sentiment Analysis...'):
news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data)
# 3. สกัด Topics
with st.spinner('3/4 Extracting Topics...'):
structured_data = extract_topics_with_hf(news_with_sentiment)
# 4. เปรียบเทียบและสรุปผลด้วย LLM
with st.spinner('4/4 Generating Final Report...'):
final_report = compare_articles(structured_data, sentiment_counts)
# 5. แสดงผลลัพธ์
st.success("Analysis Complete!")
data = final_report
st.markdown("---")
st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**")
# Articles
st.markdown("#### **Articles Analyzed:**")
display_articles(data.get("Articles", []))
# Comparative Sentiment Score
st.markdown("#### **Comparative Sentiment Score:**")
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
display_sentiment_distribution(sentiment_distribution)
coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", [])
display_coverage_differences(coverage_differences)
topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {})
display_topic_overlap(topic_overlap)
# Final Sentiment Analysis Result
st.markdown("#### **Final Sentiment Analysis Summary:**")
st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
st.markdown("---")
st.json(final_report)
# --- MAIN STREAMLIT APP ---
st.set_page_config(layout="wide")
st.title("Company News Sentiment Analysis (Yahoo Finance)")
# Input field for company name
company_name = st.text_input(
"Enter the company name/topic:",
placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)"
)
if st.button("Generate Summary"):
if company_name:
run_analysis(company_name)
else:
st.warning("Please enter a company name or topic to proceed with the analysis!")