Spaces:

Warisamm748
/

3.11.2568

Runtime error

App Files Files Community

3.11.2568 / app.py

Warisamm748

Update app.py

a112641 verified 8 months ago

Raw

History Blame

13.2 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import
	from langchain_groq import ChatGroq
	import json
	import os
	from transformers import pipeline

	# --- 0. CONFIGURATION & INITIALIZATION ---

	# การตั้งค่า Groq API Key
	# โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space
	GROQ_API_KEY = os.getenv('GROQ_API_KEY')
	if not GROQ_API_KEY:
	st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).")
	st.stop()

	# Initialize the LLM model
	llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")

	# --- 1. SCRAPING FUNCTION (Yahoo Finance Only) ---

	def extract_titles_and_summaries(company_name, num_articles=10):
	"""ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก"""
	url = 'https://finance.yahoo.com/news/'

	try:
	# เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก
	response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
	if response.status_code != 200:
	st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}")
	return []

	soup = BeautifulSoup(response.content, "html.parser")

	# Selector สำหรับรายการข่าว
	articles = soup.find_all('li', class_='stream-item', limit=num_articles)

	extracted_articles = []
	for article in articles:
	# Title Tag: h3
	title_tag = article.find('h3')
	title = "No Title Found"

	if title_tag:
	link_tag = title_tag.find('a')
	title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True)

	# Summary Tag: p
	summary_tag = article.find('p')
	summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"

	extracted_articles.append({
	"Source": "Yahoo Finance",
	"Title": title,
	"Summary": summary
	})

	return {
	"Company": company_name if company_name else "General Market News",
	"Articles": extracted_articles
	}

	except Exception as e:
	st.error(f"An error occurred during scraping: {e}")
	return []

	# --- 2. ANALYSIS FUNCTIONS ---

	def perform_sentiment_analysis(news_data):
	"""ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment"""
	# ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ
	pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1)

	articles = news_data.get("Articles", [])
	sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}

	for article in articles:
	content = f"{article['Title']} {article['Summary']}"
	sentiment_result = pipe(content)[0]
	sentiment_map = {
	"positive": "Positive",
	"negative": "Negative",
	"neutral": "Neutral",
	"very positive": "Positive",
	"very negative": "Negative"
	}
	sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
	score = float(sentiment_result["score"])

	article["Sentiment"] = sentiment
	article["Score"] = score
	if sentiment in sentiment_counts:
	sentiment_counts[sentiment] += 1

	return news_data, sentiment_counts

	def extract_topics_with_hf(news_data):
	"""ใช้ Hugging Face Pipeline สกัดหัวข้อ"""
	structured_data = {
	"Company": news_data.get("Company", "Unknown"),
	"Articles": []
	}
	topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1)
	articles = news_data.get("Articles", [])

	for article in articles:
	content = f"{article['Title']} {article['Summary']}"
	topics_result = topic_pipe(content, top_k=3)
	topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]

	structured_data["Articles"].append({
	"Source": article.get("Source", "Unknown"),
	"Title": article["Title"],
	"Summary": article["Summary"],
	"Sentiment": article.get("Sentiment", "Unknown"),
	"Score": article.get("Score", 0.0),
	"Topics": topics
	})

	return structured_data

	def extract_json(response):
	try:
	return json.loads(response)
	except json.JSONDecodeError:
	return {}

	def generate_final_sentiment(news_data, sentiment_counts):
	"""ใช้ LLM สรุปผลลัพธ์สุดท้าย"""
	company_name = news_data["Company"]
	total_articles = sum(sentiment_counts.values())
	combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]])

	prompt = f"""
	Based on the analysis of {total_articles} articles about the company "{company_name}":
	- Positive articles: {sentiment_counts['Positive']}
	- Negative articles: {sentiment_counts['Negative']}
	- Neutral articles: {sentiment_counts['Neutral']}

	Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.

	Respond ONLY with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
	"""
	response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
	final_sentiment = response if response else "Sentiment analysis summary not available."
	return final_sentiment.content

	def compare_articles(news_data, sentiment_counts):
	"""ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว"""
	articles = news_data.get("Articles", [])
	all_topics = [set(article["Topics"]) for article in articles]
	common_topics = set.intersection(*all_topics) if all_topics else set()

	# 1. ให้ LLM หาหัวข้อร่วม (Common Topics)
	topics_prompt = f"""
	Analyze the following article topics and identify only three key themes that are common across multiple articles,
	even if they are phrased differently. The topics from each article are:
	{all_topics}
	Respond ONLY with a JSON format:
	{{"CommonTopics": ["topic1", "topic2", "topic3"]}}
	"""
	response = llm.invoke([HumanMessage(content=topics_prompt)]).content
	contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]

	# 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences)
	total_articles = sum(sentiment_counts.values())
	comparison_prompt = f"""
	Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance:
	- Sentiment distribution: {sentiment_counts}
	- Commonly discussed topics across articles: {contextual_common_topics}

	Consider the following:
	1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
	2. Overall implications for the company's reputation, stock potential, and public perception.

	Respond ONLY with a concise and insightful summary in this JSON format:
	{{
	"Coverage Differences": [
	{{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}},
	{{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}}
	]
	}}
	"""
	response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
	coverage_differences = extract_json(response).get("Coverage Differences", [])
	final_sentiment = generate_final_sentiment(news_data, sentiment_counts)

	return {
	"Company": news_data["Company"],
	"Articles": articles,
	"Comparative Sentiment Score": {
	"Sentiment Distribution": sentiment_counts,
	"Coverage Differences": coverage_differences,
	"Topic Overlap": {
	"Common Topics": contextual_common_topics,
	"Unique Topics": {
	f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics))
	for i, (topics, article) in enumerate(zip(all_topics, articles))
	}
	}
	},
	"Final Sentiment Analysis": final_sentiment
	}

	# --- 3. STREAMLIT UI IMPLEMENTATION ---

	def display_articles(articles):
	for i, article in enumerate(articles, start=1):
	st.markdown(f"##### Article {i} ({article['Source']}): {article['Title']}")
	st.write(f"- Summary: {article['Summary']}")
	st.write(f"- Sentiment: {article['Sentiment']} \| Score: {article['Score']:.2f}")
	st.write(f"- Topics: {', '.join(article['Topics'])}")

	def display_sentiment_distribution(sentiment_distribution):
	st.markdown("#### Sentiment Distribution:")
	sentiment_data = {
	"Sentiment": list(sentiment_distribution.keys()),
	"Count": list(sentiment_distribution.values())
	}
	st.table(sentiment_data)

	def display_coverage_differences(coverage_differences):
	if coverage_differences:
	st.markdown("#### Coverage Differences:")
	for diff in coverage_differences:
	comparison = diff.get('Comparison', 'No Comparison Detail')
	impact = diff.get('Impact', 'No Impact Detail')
	st.write(f"- {comparison}: {impact}")

	def display_topic_overlap(topic_overlap):
	st.markdown("#### Topic Overlap:")
	st.write(f"- Common Topics: {', '.join(topic_overlap.get('Common Topics', ['N/A']))}")
	st.markdown("- Unique Topics by Article:")
	for article, topics in topic_overlap.get("Unique Topics", {}).items():
	st.write(f" - {article}: {', '.join(topics)}")

	def run_analysis(company_name):
	# 1. ดึงข่าว
	with st.spinner('1/4 Scraping news from Yahoo Finance...'):
	news_data = extract_titles_and_summaries(company_name)

	if not news_data or not news_data['Articles']:
	st.warning("Could not find any articles or scraping failed.")
	return

	# 2. วิเคราะห์ Sentiment
	with st.spinner('2/4 Performing Sentiment Analysis...'):
	news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data)

	# 3. สกัด Topics
	with st.spinner('3/4 Extracting Topics...'):
	structured_data = extract_topics_with_hf(news_with_sentiment)

	# 4. เปรียบเทียบและสรุปผลด้วย LLM
	with st.spinner('4/4 Generating Final Report...'):
	final_report = compare_articles(structured_data, sentiment_counts)

	# 5. แสดงผลลัพธ์
	st.success("Analysis Complete!")

	data = final_report

	st.markdown("---")
	st.markdown(f"## Analysis Report for: {data.get('Company', 'Unknown')}")

	# Articles
	st.markdown("#### Articles Analyzed:")
	display_articles(data.get("Articles", []))

	# Comparative Sentiment Score
	st.markdown("#### Comparative Sentiment Score:")
	sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
	display_sentiment_distribution(sentiment_distribution)

	coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", [])
	display_coverage_differences(coverage_differences)

	topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {})
	display_topic_overlap(topic_overlap)

	# Final Sentiment Analysis Result
	st.markdown("#### Final Sentiment Analysis Summary:")
	st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
	st.markdown("---")

	st.json(final_report)

	# --- MAIN STREAMLIT APP ---

	st.set_page_config(layout="wide")
	st.title("Company News Sentiment Analysis (Yahoo Finance)")

	# Input field for company name
	company_name = st.text_input(
	"Enter the company name/topic:",
	placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)"
	)

	if st.button("Generate Summary"):
	if company_name:
	run_analysis(company_name)
	else:
	st.warning("Please enter a company name or topic to proceed with the analysis!")