janjibDEV commited on
Commit
bd1c4e8
·
1 Parent(s): 81917a3

50% on GAIA benchmark

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. agent.py +161 -0
  3. app.py +12 -12
  4. requirements.txt +159 -2
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ GAIA_result.txt
3
+ __pycache__/
4
+ venv/
agent.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool, PythonInterpreterTool, FinalAnswerTool, VisitWebpageTool, tool
2
+ import os
3
+ import wikipediaapi
4
+ from youtubesearchpython import VideosSearch
5
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
6
+ import pandas as pd
7
+
8
+ wiki_api = wikipediaapi.Wikipedia(
9
+ language='en',
10
+ user_agent="MyAgent/1.0 (contact@example.com)"
11
+ )
12
+
13
+ @tool
14
+ def search_youtube_video(query: str) -> str:
15
+ """
16
+ Searches YouTube and returns the title and URL of the top result.
17
+
18
+ Args:
19
+ query (str): The search term to look up on YouTube.
20
+
21
+ Returns:
22
+ str: The title and URL of the top video result.
23
+
24
+ """
25
+ print(f"--- Executing Youtube with query: '{query}' ---")
26
+ try:
27
+ search = VideosSearch(query, limit=1)
28
+ top_result = search.result()['result'][0]
29
+ video_id = top_result['id']
30
+ video_title = top_result['title']
31
+ video_url = f"https://www.youtube.com/watch?v={video_id}"
32
+ return f"Title: {video_title}\nURL: {video_url}"
33
+ except IndexError:
34
+ return "Error: No YouTube videos found for that query."
35
+ except Exception as e:
36
+ return f"An unknown error occurred during Youtube: {e}"
37
+
38
+ @tool
39
+ def get_youtube_transcript(video_url: str) -> str:
40
+ """
41
+ Extracts and returns the full transcript of a YouTube video.
42
+
43
+ Args:
44
+ video_url (str): The full URL of the YouTube video.
45
+
46
+ Returns:
47
+ str: The transcript text, or an error message if unavailable.
48
+ """
49
+ print(f"--- Executing YouTube Transcript Tool for URL: '{video_url}' ---")
50
+ try:
51
+ # Extract video ID from URL
52
+ if "watch?v=" in video_url:
53
+ video_id = video_url.split("watch?v=")[1].split("&")[0]
54
+ elif "youtu.be/" in video_url:
55
+ video_id = video_url.split("youtu.be/")[1].split("?")[0]
56
+ else:
57
+ return "Error: Invalid YouTube URL format."
58
+
59
+ # Fetch the transcript
60
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
61
+
62
+ # Combine transcript segments into a single block of text
63
+ full_transcript = " ".join([item['text'] for item in transcript_list])
64
+
65
+ return full_transcript
66
+ except NoTranscriptFound:
67
+ return "Error: No transcript could be found for this video."
68
+ except TranscriptsDisabled:
69
+ return "Error: Transcripts are disabled for this video."
70
+ except Exception as e:
71
+ return f"An unknown error occurred while fetching the transcript: {e}"
72
+
73
+ @tool
74
+ def get_wikipedia_summary(query: str) -> str:
75
+ """
76
+ Fetches and returns the summary of a Wikipedia article.
77
+
78
+ Args:
79
+ query (str): The title or topic of the Wikipedia article to search.
80
+
81
+ Returns:
82
+ str: The summary text of the article, or an error message if not found.
83
+ """
84
+ print(f"--- Executing Wikipedia Tool with query: '{query}' ---")
85
+ page = wiki_api.page(query)
86
+ if not page.exists():
87
+ return f"Error: The Wikipedia page for '{query}' could not be found."
88
+ return f"Title: {page.title}\n\nSummary:\n{page.summary}"
89
+
90
+ @tool
91
+ def analyze_excel_file(file_path: str, query: str) -> str:
92
+ """
93
+ Analyze an Excel file using pandas and answer a question about it.
94
+ Args:
95
+ file_path (str): the path to the Excel file.
96
+ query (str): Question about the data
97
+ """
98
+ try:
99
+ # Read the Excel file
100
+ df = pd.read_excel(file_path)
101
+
102
+ # Run various analyses based on the query
103
+ result = (
104
+ f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
105
+ )
106
+ result += f"Columns: {', '.join(df.columns)}\n\n"
107
+
108
+ # Add summary statistics
109
+ result += "Summary statistics:\n"
110
+ result += str(df.describe())
111
+
112
+ return result
113
+
114
+ except Exception as e:
115
+ return f"Error analyzing Excel file: {str(e)}"
116
+
117
+ class BasicAgent:
118
+ def __init__(self):
119
+
120
+
121
+ model = LiteLLMModel(model_id="gpt-4.1-2025-04-14")
122
+
123
+
124
+ self.agent = CodeAgent(
125
+ model=model,
126
+ tools=[DuckDuckGoSearchTool(),
127
+ PythonInterpreterTool(),
128
+ FinalAnswerTool(),
129
+ VisitWebpageTool(),
130
+ search_youtube_video,
131
+ get_youtube_transcript,
132
+ get_wikipedia_summary,
133
+ analyze_excel_file],
134
+ additional_authorized_imports=['numpy','csv','xlrd','openpyxl','pandas','markdownify','requests'],
135
+ add_base_tools=False,
136
+ max_steps=10,
137
+ )
138
+
139
+
140
+
141
+
142
+ def __call__(self, question: str) -> str:
143
+
144
+ custom_prompt = ("""
145
+ __CONSTRAINTS__
146
+ - DO NOT start with an intro or include an outro.
147
+ """)
148
+
149
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
150
+ result = self.agent.run(custom_prompt + question)
151
+
152
+ print("Raw result:", result)
153
+
154
+ if isinstance(result, dict) and "output" in result:
155
+ final_str = str(result["output"]).strip()
156
+ elif hasattr(result, "output"):
157
+ final_str = str(result.output).strip()
158
+ else:
159
+ final_str = str(result).strip()
160
+
161
+ return final_str
app.py CHANGED
@@ -3,22 +3,12 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -72,6 +62,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
72
  # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
 
75
  print(f"Running agent on {len(questions_data)} questions...")
76
  for item in questions_data:
77
  task_id = item.get("task_id")
@@ -83,10 +74,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
83
  submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
  print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import BasicAgent
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def run_and_submit_all( profile: gr.OAuthProfile | None):
13
  """
14
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
62
  # 3. Run your Agent
63
  results_log = []
64
  answers_payload = []
65
+ f = open("GAIA_result.txt", "w")
66
  print(f"Running agent on {len(questions_data)} questions...")
67
  for item in questions_data:
68
  task_id = item.get("task_id")
 
74
  submitted_answer = agent(question_text)
75
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
76
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
77
+ f.write(f'''
78
+ ------------------------------------------------------------------------------------------
79
+ - ID
80
+ {task_id}
81
+ - Question
82
+ {question_text}
83
+ - Answer
84
+ {submitted_answer}
85
+ ''')
86
  except Exception as e:
87
  print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
+ f.close()
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
requirements.txt CHANGED
@@ -1,2 +1,159 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.13
4
+ aiosignal==1.3.2
5
+ aiosqlite==0.21.0
6
+ annotated-types==0.7.0
7
+ anyio==4.9.0
8
+ asttokens==3.0.0
9
+ attrs==25.3.0
10
+ Authlib==1.6.0
11
+ banks==2.1.3
12
+ beautifulsoup4==4.13.4
13
+ certifi==2025.6.15
14
+ cffi==1.17.1
15
+ charset-normalizer==3.4.2
16
+ click==8.2.1
17
+ colorama==0.4.6
18
+ comm==0.2.2
19
+ cryptography==45.0.4
20
+ dataclasses-json==0.6.7
21
+ debugpy==1.8.14
22
+ decorator==5.2.1
23
+ defusedxml==0.7.1
24
+ Deprecated==1.2.18
25
+ dirtyjson==1.0.8
26
+ distro==1.9.0
27
+ duckduckgo_search==8.0.4
28
+ et_xmlfile==2.0.0
29
+ executing==2.2.0
30
+ fastapi==0.115.13
31
+ ffmpy==0.6.0
32
+ filelock==3.18.0
33
+ filetype==1.2.0
34
+ frozenlist==1.7.0
35
+ fsspec==2025.5.1
36
+ gradio==5.34.2
37
+ gradio_client==1.10.3
38
+ greenlet==3.2.3
39
+ griffe==1.7.3
40
+ groovy==0.1.2
41
+ h11==0.16.0
42
+ hf-xet==1.1.5
43
+ httpcore==1.0.9
44
+ httpx==0.28.1
45
+ huggingface-hub==0.33.0
46
+ idna==3.10
47
+ importlib_metadata==8.7.0
48
+ ipykernel==6.29.5
49
+ ipython==9.3.0
50
+ ipython_pygments_lexers==1.1.1
51
+ itsdangerous==2.2.0
52
+ jedi==0.19.2
53
+ Jinja2==3.1.6
54
+ jiter==0.10.0
55
+ joblib==1.5.1
56
+ jsonschema==4.24.0
57
+ jsonschema-specifications==2025.4.1
58
+ jupyter_client==8.6.3
59
+ jupyter_core==5.8.1
60
+ litellm==1.72.9
61
+ llama-cloud==0.1.26
62
+ llama-cloud-services==0.6.34
63
+ llama-index==0.12.44
64
+ llama-index-agent-openai==0.4.11
65
+ llama-index-cli==0.4.3
66
+ llama-index-core==0.12.44
67
+ llama-index-embeddings-openai==0.3.1
68
+ llama-index-indices-managed-llama-cloud==0.7.7
69
+ llama-index-instrumentation==0.2.0
70
+ llama-index-llms-openai==0.4.7
71
+ llama-index-multi-modal-llms-openai==0.5.1
72
+ llama-index-program-openai==0.3.2
73
+ llama-index-question-gen-openai==0.3.1
74
+ llama-index-readers-file==0.4.9
75
+ llama-index-readers-llama-parse==0.4.0
76
+ llama-index-workflows==1.0.1
77
+ llama-parse==0.6.34
78
+ lxml==5.4.0
79
+ markdown-it-py==3.0.0
80
+ markdownify==1.1.0
81
+ MarkupSafe==3.0.2
82
+ marshmallow==3.26.1
83
+ matplotlib-inline==0.1.7
84
+ mdurl==0.1.2
85
+ multidict==6.5.0
86
+ mypy_extensions==1.1.0
87
+ nest-asyncio==1.6.0
88
+ networkx==3.5
89
+ nltk==3.9.1
90
+ numpy==2.3.1
91
+ openai==1.90.0
92
+ openpyxl==3.1.5
93
+ orjson==3.10.18
94
+ packaging==25.0
95
+ pandas==2.2.3
96
+ parso==0.8.4
97
+ pexpect==4.9.0
98
+ pillow==11.2.1
99
+ platformdirs==4.3.8
100
+ primp==0.15.0
101
+ prompt_toolkit==3.0.51
102
+ propcache==0.3.2
103
+ psutil==7.0.0
104
+ ptyprocess==0.7.0
105
+ pure_eval==0.2.3
106
+ pycparser==2.22
107
+ pydantic==2.11.7
108
+ pydantic_core==2.33.2
109
+ pydub==0.25.1
110
+ Pygments==2.19.2
111
+ pypdf==5.6.1
112
+ python-dateutil==2.9.0.post0
113
+ python-dotenv==1.1.0
114
+ python-multipart==0.0.20
115
+ pytz==2025.2
116
+ PyYAML==6.0.2
117
+ pyzmq==27.0.0
118
+ referencing==0.36.2
119
+ regex==2024.11.6
120
+ requests==2.32.4
121
+ rich==14.0.0
122
+ rpds-py==0.25.1
123
+ ruff==0.12.0
124
+ safehttpx==0.1.6
125
+ semantic-version==2.10.0
126
+ setuptools==80.9.0
127
+ shellingham==1.5.4
128
+ six==1.17.0
129
+ smolagents==1.18.0
130
+ sniffio==1.3.1
131
+ soupsieve==2.7
132
+ SQLAlchemy==2.0.41
133
+ stack-data==0.6.3
134
+ starlette==0.46.2
135
+ striprtf==0.0.26
136
+ tenacity==9.1.2
137
+ tiktoken==0.9.0
138
+ tokenizers==0.21.1
139
+ tomlkit==0.13.3
140
+ tornado==6.5.1
141
+ tqdm==4.67.1
142
+ traitlets==5.14.3
143
+ typer==0.16.0
144
+ typing-inspect==0.9.0
145
+ typing-inspection==0.4.1
146
+ typing_extensions==4.14.0
147
+ tzdata==2025.2
148
+ urllib3==2.5.0
149
+ uvicorn==0.34.3
150
+ wcwidth==0.2.13
151
+ websockets==15.0.1
152
+ Wikipedia-API==0.8.1
153
+ wrapt==1.17.2
154
+ xlrd==2.0.2
155
+ yarl==1.20.1
156
+ youtube-python==1.0.13
157
+ youtube-search-python==1.6.6
158
+ youtube-transcript-api==1.1.0
159
+ zipp==3.23.0