Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, PythonInterpreterTool, FinalAnswerTool, VisitWebpageTool, tool | |
| import os | |
| import wikipediaapi | |
| from youtubesearchpython import VideosSearch | |
| from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled | |
| import pandas as pd | |
| wiki_api = wikipediaapi.Wikipedia( | |
| language='en', | |
| user_agent="MyAgent/1.0 (contact@example.com)" | |
| ) | |
| def search_youtube_video(query: str) -> str: | |
| """ | |
| Searches YouTube and returns the title and URL of the top result. | |
| Args: | |
| query (str): The search term to look up on YouTube. | |
| Returns: | |
| str: The title and URL of the top video result. | |
| """ | |
| print(f"--- Executing Youtube with query: '{query}' ---") | |
| try: | |
| search = VideosSearch(query, limit=1) | |
| top_result = search.result()['result'][0] | |
| video_id = top_result['id'] | |
| video_title = top_result['title'] | |
| video_url = f"https://www.youtube.com/watch?v={video_id}" | |
| return f"Title: {video_title}\nURL: {video_url}" | |
| except IndexError: | |
| return "Error: No YouTube videos found for that query." | |
| except Exception as e: | |
| return f"An unknown error occurred during Youtube: {e}" | |
| def get_youtube_transcript(video_url: str) -> str: | |
| """ | |
| Extracts and returns the full transcript of a YouTube video. | |
| Args: | |
| video_url (str): The full URL of the YouTube video. | |
| Returns: | |
| str: The transcript text, or an error message if unavailable. | |
| """ | |
| print(f"--- Executing YouTube Transcript Tool for URL: '{video_url}' ---") | |
| try: | |
| # Extract video ID from URL | |
| if "watch?v=" in video_url: | |
| video_id = video_url.split("watch?v=")[1].split("&")[0] | |
| elif "youtu.be/" in video_url: | |
| video_id = video_url.split("youtu.be/")[1].split("?")[0] | |
| else: | |
| return "Error: Invalid YouTube URL format." | |
| # Fetch the transcript | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| # Combine transcript segments into a single block of text | |
| full_transcript = " ".join([item['text'] for item in transcript_list]) | |
| return full_transcript | |
| except NoTranscriptFound: | |
| return "Error: No transcript could be found for this video." | |
| except TranscriptsDisabled: | |
| return "Error: Transcripts are disabled for this video." | |
| except Exception as e: | |
| return f"An unknown error occurred while fetching the transcript: {e}" | |
| def get_wikipedia_summary(query: str) -> str: | |
| """ | |
| Fetches and returns the summary of a Wikipedia article. | |
| Args: | |
| query (str): The title or topic of the Wikipedia article to search. | |
| Returns: | |
| str: The summary text of the article, or an error message if not found. | |
| """ | |
| print(f"--- Executing Wikipedia Tool with query: '{query}' ---") | |
| page = wiki_api.page(query) | |
| if not page.exists(): | |
| return f"Error: The Wikipedia page for '{query}' could not be found." | |
| return f"Title: {page.title}\n\nSummary:\n{page.summary}" | |
| def analyze_excel_file(file_path: str, query: str) -> str: | |
| """ | |
| Analyze an Excel file using pandas and answer a question about it. | |
| Args: | |
| file_path (str): the path to the Excel file. | |
| query (str): Question about the data | |
| """ | |
| try: | |
| # Read the Excel file | |
| df = pd.read_excel(file_path) | |
| # Run various analyses based on the query | |
| result = ( | |
| f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| ) | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| # Add summary statistics | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except Exception as e: | |
| return f"Error analyzing Excel file: {str(e)}" | |
| class BasicAgent: | |
| def __init__(self): | |
| model = LiteLLMModel(model_id="gpt-4.1-2025-04-14") | |
| self.agent = CodeAgent( | |
| model=model, | |
| tools=[DuckDuckGoSearchTool(), | |
| PythonInterpreterTool(), | |
| FinalAnswerTool(), | |
| VisitWebpageTool(), | |
| search_youtube_video, | |
| get_youtube_transcript, | |
| get_wikipedia_summary, | |
| analyze_excel_file], | |
| additional_authorized_imports=['numpy','csv','xlrd','openpyxl','pandas','markdownify','requests'], | |
| add_base_tools=False, | |
| max_steps=10, | |
| ) | |
| def __call__(self, question: str) -> str: | |
| custom_prompt = (""" | |
| __CONSTRAINTS__ | |
| - DO NOT start with an intro or include an outro. | |
| """) | |
| print(f"Agent received question (first 50 chars): {question[:50]}...") | |
| result = self.agent.run(custom_prompt + question) | |
| print("Raw result:", result) | |
| if isinstance(result, dict) and "output" in result: | |
| final_str = str(result["output"]).strip() | |
| elif hasattr(result, "output"): | |
| final_str = str(result.output).strip() | |
| else: | |
| final_str = str(result).strip() | |
| return final_str |