from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, PythonInterpreterTool, FinalAnswerTool, VisitWebpageTool, tool import os import wikipediaapi from youtubesearchpython import VideosSearch from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled import pandas as pd wiki_api = wikipediaapi.Wikipedia( language='en', user_agent="MyAgent/1.0 (contact@example.com)" ) @tool def search_youtube_video(query: str) -> str: """ Searches YouTube and returns the title and URL of the top result. Args: query (str): The search term to look up on YouTube. Returns: str: The title and URL of the top video result. """ print(f"--- Executing Youtube with query: '{query}' ---") try: search = VideosSearch(query, limit=1) top_result = search.result()['result'][0] video_id = top_result['id'] video_title = top_result['title'] video_url = f"https://www.youtube.com/watch?v={video_id}" return f"Title: {video_title}\nURL: {video_url}" except IndexError: return "Error: No YouTube videos found for that query." except Exception as e: return f"An unknown error occurred during Youtube: {e}" @tool def get_youtube_transcript(video_url: str) -> str: """ Extracts and returns the full transcript of a YouTube video. Args: video_url (str): The full URL of the YouTube video. Returns: str: The transcript text, or an error message if unavailable. """ print(f"--- Executing YouTube Transcript Tool for URL: '{video_url}' ---") try: # Extract video ID from URL if "watch?v=" in video_url: video_id = video_url.split("watch?v=")[1].split("&")[0] elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0] else: return "Error: Invalid YouTube URL format." # Fetch the transcript transcript_list = YouTubeTranscriptApi.get_transcript(video_id) # Combine transcript segments into a single block of text full_transcript = " ".join([item['text'] for item in transcript_list]) return full_transcript except NoTranscriptFound: return "Error: No transcript could be found for this video." except TranscriptsDisabled: return "Error: Transcripts are disabled for this video." except Exception as e: return f"An unknown error occurred while fetching the transcript: {e}" @tool def get_wikipedia_summary(query: str) -> str: """ Fetches and returns the summary of a Wikipedia article. Args: query (str): The title or topic of the Wikipedia article to search. Returns: str: The summary text of the article, or an error message if not found. """ print(f"--- Executing Wikipedia Tool with query: '{query}' ---") page = wiki_api.page(query) if not page.exists(): return f"Error: The Wikipedia page for '{query}' could not be found." return f"Title: {page.title}\n\nSummary:\n{page.summary}" @tool def analyze_excel_file(file_path: str, query: str) -> str: """ Analyze an Excel file using pandas and answer a question about it. Args: file_path (str): the path to the Excel file. query (str): Question about the data """ try: # Read the Excel file df = pd.read_excel(file_path) # Run various analyses based on the query result = ( f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" ) result += f"Columns: {', '.join(df.columns)}\n\n" # Add summary statistics result += "Summary statistics:\n" result += str(df.describe()) return result except Exception as e: return f"Error analyzing Excel file: {str(e)}" class BasicAgent: def __init__(self): model = LiteLLMModel(model_id="gpt-4.1-2025-04-14") self.agent = CodeAgent( model=model, tools=[DuckDuckGoSearchTool(), PythonInterpreterTool(), FinalAnswerTool(), VisitWebpageTool(), search_youtube_video, get_youtube_transcript, get_wikipedia_summary, analyze_excel_file], additional_authorized_imports=['numpy','csv','xlrd','openpyxl','pandas','markdownify','requests'], add_base_tools=False, max_steps=10, ) def __call__(self, question: str) -> str: custom_prompt = (""" __CONSTRAINTS__ - DO NOT start with an intro or include an outro. """) print(f"Agent received question (first 50 chars): {question[:50]}...") result = self.agent.run(custom_prompt + question) print("Raw result:", result) if isinstance(result, dict) and "output" in result: final_str = str(result["output"]).strip() elif hasattr(result, "output"): final_str = str(result.output).strip() else: final_str = str(result).strip() return final_str