File size: 13,218 Bytes
a1e8780
 
6f622fa
a112641
6f622fa
 
 
 
a1e8780
6f622fa
a1e8780
6f622fa
a112641
 
6f622fa
a112641
6f622fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1e8780
6f622fa
 
a1e8780
 
6f622fa
a1e8780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f622fa
 
 
a1e8780
 
 
6f622fa
a1e8780
6f622fa
a1e8780
 
6f622fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a112641
6f622fa
 
 
 
 
 
 
 
 
 
 
 
a1e8780
 
6f622fa
a1e8780
6f622fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain_core.messages import HumanMessage # ใช้ langchain_core เพื่อแก้ปัญหาการ Import
from langchain_groq import ChatGroq
import json
import os
from transformers import pipeline

# --- 0. CONFIGURATION & INITIALIZATION ---

# การตั้งค่า Groq API Key
# โค้ดจะดึงค่าจาก Secret ที่ชื่อ GROQ_API_KEY ใน Hugging Face Space
GROQ_API_KEY = os.getenv('GROQ_API_KEY') 
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY is not set. Please configure it in your Space Secrets (Settings > Repository secrets).")
    st.stop()
    
# Initialize the LLM model
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")

# --- 1. SCRAPING FUNCTION (Yahoo Finance Only) ---

def extract_titles_and_summaries(company_name, num_articles=10):
    """ดึงหัวข้อและสรุปข่าวจาก Yahoo Finance หน้าหลัก"""
    url = 'https://finance.yahoo.com/news/' 
    
    try:
        # เพิ่ม User-Agent เพื่อหลีกเลี่ยงการถูกบล็อก
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) 
        if response.status_code != 200:
            st.error(f"Failed to fetch Yahoo Finance. Status code: {response.status_code}")
            return []
            
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Selector สำหรับรายการข่าว
        articles = soup.find_all('li', class_='stream-item', limit=num_articles)
        
        extracted_articles = []
        for article in articles:
            # Title Tag: h3
            title_tag = article.find('h3') 
            title = "No Title Found"
            
            if title_tag:
                link_tag = title_tag.find('a')
                title = link_tag.get_text(strip=True) if link_tag else title_tag.get_text(strip=True)
            
            # Summary Tag: p
            summary_tag = article.find('p') 
            summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
            
            extracted_articles.append({
                "Source": "Yahoo Finance",
                "Title": title,
                "Summary": summary
            })
            
        return {
            "Company": company_name if company_name else "General Market News",
            "Articles": extracted_articles
        }
        
    except Exception as e:
        st.error(f"An error occurred during scraping: {e}")
        return []

# --- 2. ANALYSIS FUNCTIONS ---

def perform_sentiment_analysis(news_data):
    """ใช้ Hugging Face Pipeline วิเคราะห์ Sentiment"""
    # ใช้ device=-1 เพื่อให้ทำงานบน CPU/อัตโนมัติ 
    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=-1) 
    
    articles = news_data.get("Articles", [])
    sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
    
    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        sentiment_result = pipe(content)[0]
        sentiment_map = {
            "positive": "Positive",
            "negative": "Negative",
            "neutral": "Neutral",
            "very positive": "Positive",
            "very negative": "Negative"
        }
        sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
        score = float(sentiment_result["score"])
        
        article["Sentiment"] = sentiment
        article["Score"] = score
        if sentiment in sentiment_counts:
            sentiment_counts[sentiment] += 1
            
    return news_data, sentiment_counts

def extract_topics_with_hf(news_data):
    """ใช้ Hugging Face Pipeline สกัดหัวข้อ"""
    structured_data = {
        "Company": news_data.get("Company", "Unknown"),
        "Articles": []
    }
    topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=-1)
    articles = news_data.get("Articles", [])
    
    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        topics_result = topic_pipe(content, top_k=3)
        topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
        
        structured_data["Articles"].append({
            "Source": article.get("Source", "Unknown"), 
            "Title": article["Title"],
            "Summary": article["Summary"],
            "Sentiment": article.get("Sentiment", "Unknown"),
            "Score": article.get("Score", 0.0),
            "Topics": topics
        })
        
    return structured_data

def extract_json(response):
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        return {}

def generate_final_sentiment(news_data, sentiment_counts):
    """ใช้ LLM สรุปผลลัพธ์สุดท้าย"""
    company_name = news_data["Company"]
    total_articles = sum(sentiment_counts.values())
    combined_summaries = " ".join([f"({article.get('Source', 'Unknown')}) {article['Summary']}" for article in news_data["Articles"]])
    
    prompt = f"""
    Based on the analysis of {total_articles} articles about the company "{company_name}":
    - Positive articles: {sentiment_counts['Positive']}
    - Negative articles: {sentiment_counts['Negative']}
    - Neutral articles: {sentiment_counts['Neutral']}
    
    Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
    
    Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
    """
    response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
    final_sentiment = response if response else "Sentiment analysis summary not available."
    return final_sentiment.content

def compare_articles(news_data, sentiment_counts):
    """ใช้ LLM เปรียบเทียบและสรุปความแตกต่างของข่าว"""
    articles = news_data.get("Articles", [])
    all_topics = [set(article["Topics"]) for article in articles]
    common_topics = set.intersection(*all_topics) if all_topics else set()
    
    # 1. ให้ LLM หาหัวข้อร่วม (Common Topics)
    topics_prompt = f"""
    Analyze the following article topics and identify **only three** key themes that are common across multiple articles,     
    even if they are phrased differently. The topics from each article are:
    {all_topics}
    Respond **ONLY** with a JSON format:
    {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
    """
    response = llm.invoke([HumanMessage(content=topics_prompt)]).content
    contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]
    
    # 2. ให้ LLM เปรียบเทียบความแตกต่าง (Coverage Differences)
    total_articles = sum(sentiment_counts.values())
    comparison_prompt = f"""
    Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}" from Yahoo Finance:
    - Sentiment distribution: {sentiment_counts}
    - Commonly discussed topics across articles: {contextual_common_topics}
    
    Consider the following:
    1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
    2. Overall implications for the company's reputation, stock potential, and public perception.
    
    Respond **ONLY** with a concise and insightful summary in this JSON format:
    {{
        "Coverage Differences": [
            {{"Comparison": "Brief contrast between a positive and negative article", "Impact": "Concise impact statement"}},
            {{"Comparison": "Brief contrast between articles on different topics", "Impact": "Concise impact statement"}}
        ]
    }}
    """
    response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
    coverage_differences = extract_json(response).get("Coverage Differences", [])
    final_sentiment = generate_final_sentiment(news_data, sentiment_counts)

    return {
        "Company": news_data["Company"],
        "Articles": articles,
        "Comparative Sentiment Score": {
            "Sentiment Distribution": sentiment_counts,
            "Coverage Differences": coverage_differences,
            "Topic Overlap": {
                "Common Topics": contextual_common_topics,
                "Unique Topics": {
                    f"Article {i+1} ({article.get('Source', 'Unknown')})": list(topics - set(contextual_common_topics))
                    for i, (topics, article) in enumerate(zip(all_topics, articles))
                }
            }
        },
        "Final Sentiment Analysis": final_sentiment
    }

# --- 3. STREAMLIT UI IMPLEMENTATION ---

def display_articles(articles):
    for i, article in enumerate(articles, start=1):
        st.markdown(f"##### **Article {i} ({article['Source']}): {article['Title']}**")
        st.write(f"- **Summary:** {article['Summary']}")
        st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
        st.write(f"- **Topics:** {', '.join(article['Topics'])}")

def display_sentiment_distribution(sentiment_distribution):
    st.markdown("#### **Sentiment Distribution:**")
    sentiment_data = {
        "Sentiment": list(sentiment_distribution.keys()),
        "Count": list(sentiment_distribution.values())
    }
    st.table(sentiment_data)

def display_coverage_differences(coverage_differences):
    if coverage_differences:
        st.markdown("#### **Coverage Differences:**")
        for diff in coverage_differences:
            comparison = diff.get('Comparison', 'No Comparison Detail')
            impact = diff.get('Impact', 'No Impact Detail')
            st.write(f"- **{comparison}:** {impact}")

def display_topic_overlap(topic_overlap):
    st.markdown("#### **Topic Overlap:**")
    st.write(f"- **Common Topics:** {', '.join(topic_overlap.get('Common Topics', ['N/A']))}")
    st.markdown("- **Unique Topics by Article:**")
    for article, topics in topic_overlap.get("Unique Topics", {}).items():
        st.write(f"  - **{article}:** {', '.join(topics)}")

def run_analysis(company_name):
    # 1. ดึงข่าว
    with st.spinner('1/4 Scraping news from Yahoo Finance...'):
        news_data = extract_titles_and_summaries(company_name)
    
    if not news_data or not news_data['Articles']:
        st.warning("Could not find any articles or scraping failed.")
        return

    # 2. วิเคราะห์ Sentiment
    with st.spinner('2/4 Performing Sentiment Analysis...'):
        news_with_sentiment, sentiment_counts = perform_sentiment_analysis(news_data)

    # 3. สกัด Topics
    with st.spinner('3/4 Extracting Topics...'):
        structured_data = extract_topics_with_hf(news_with_sentiment)
    
    # 4. เปรียบเทียบและสรุปผลด้วย LLM
    with st.spinner('4/4 Generating Final Report...'):
        final_report = compare_articles(structured_data, sentiment_counts)
    
    # 5. แสดงผลลัพธ์
    st.success("Analysis Complete!")
    
    data = final_report
    
    st.markdown("---")
    st.markdown(f"## **Analysis Report for: {data.get('Company', 'Unknown')}**")
    
    # Articles
    st.markdown("#### **Articles Analyzed:**")
    display_articles(data.get("Articles", []))
    
    # Comparative Sentiment Score
    st.markdown("#### **Comparative Sentiment Score:**")
    sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
    display_sentiment_distribution(sentiment_distribution)
    
    coverage_differences = data.get("Comparative Sentiment Score", {}).get("Coverage Differences", [])
    display_coverage_differences(coverage_differences)
    
    topic_overlap = data.get("Comparative Sentiment Score", {}).get("Topic Overlap", {})
    display_topic_overlap(topic_overlap)
    
    # Final Sentiment Analysis Result
    st.markdown("#### **Final Sentiment Analysis Summary:**")
    st.markdown(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
    st.markdown("---")
    
    st.json(final_report)

# --- MAIN STREAMLIT APP ---

st.set_page_config(layout="wide")
st.title("Company News Sentiment Analysis (Yahoo Finance)")

# Input field for company name
company_name = st.text_input(
    "Enter the company name/topic:",
    placeholder="Example: Microsoft, Apple, Tesla (Note: Currently scrapes general market news from Yahoo Finance)"
)

if st.button("Generate Summary"):
    if company_name:
        run_analysis(company_name)
    else:
        st.warning("Please enter a company name or topic to proceed with the analysis!")