File size: 4,725 Bytes
a924780 99db189 a924780 99db189 a924780 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """Utility functions for leaderboard"""
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional
import json
def load_leaderboard_data(data_dir: str = "leaderboard/data") -> pd.DataFrame:
"""
Load all leaderboard data from JSON files
Args:
data_dir: Directory containing submission JSON files
Returns:
DataFrame with all submissions
"""
data_path = Path(data_dir)
if not data_path.exists():
# Create empty DataFrame with default columns
return pd.DataFrame(columns=[
'submission_id', 'model_name', 'model_provider', 'ocr_type',
'submitter', 'submission_date', 'km_exact_match', 'km_tolerance_match',
'kcat_exact_match', 'kcat_tolerance_match', 'km_kcat_exact_match',
'km_kcat_tolerance_match', 'overall_exact_match', 'overall_tolerance_match',
'total_papers', 'total_entries', 'notes', 'verified'
])
all_data = []
# Load from main data directory
for json_file in data_path.glob("*.json"):
try:
with open(json_file, 'r') as f:
data = json.load(f)
all_data.append(data)
except Exception as e:
print(f"Error loading {json_file}: {e}")
# Load from submissions/ subdirectory (auto-eval submissions)
submissions_dir = data_path / "submissions"
if submissions_dir.exists():
for json_file in submissions_dir.glob("*.json"):
try:
with open(json_file, 'r') as f:
data = json.load(f)
all_data.append(data)
except Exception as e:
print(f"Error loading {json_file}: {e}")
if not all_data:
return pd.DataFrame(columns=[
'submission_id', 'model_name', 'model_provider', 'ocr_type',
'submitter', 'submission_date', 'km_exact_match', 'km_tolerance_match',
'kcat_exact_match', 'kcat_tolerance_match', 'km_kcat_exact_match',
'km_kcat_tolerance_match', 'overall_exact_match', 'overall_tolerance_match',
'total_papers', 'total_entries', 'notes', 'verified'
])
df = pd.DataFrame(all_data)
# Convert date strings to datetime
if 'submission_date' in df.columns:
df['submission_date'] = pd.to_datetime(df['submission_date'])
return df.sort_values('overall_exact_match', ascending=False)
def format_metrics(value: float, as_percentage: bool = True) -> str:
"""Format metric value for display"""
if as_percentage:
return f"{value * 100:.2f}%"
return f"{value:.4f}"
def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
"""Get summary statistics from leaderboard"""
if df.empty:
return {
'total_submissions': 0,
'unique_models': 0,
'best_score': 0.0,
'avg_score': 0.0
}
return {
'total_submissions': len(df),
'unique_models': df['model_name'].nunique(),
'best_score': df['overall_exact_match'].max() * 100,
'avg_score': df['overall_exact_match'].mean() * 100,
'verified_submissions': df['verified'].sum() if 'verified' in df.columns else 0
}
def filter_leaderboard(
df: pd.DataFrame,
model_provider: Optional[str] = None,
ocr_type: Optional[str] = None,
verified_only: bool = False
) -> pd.DataFrame:
"""Filter leaderboard based on criteria"""
filtered_df = df.copy()
if model_provider and model_provider != "All":
filtered_df = filtered_df[filtered_df['model_provider'] == model_provider]
if ocr_type and ocr_type != "All":
filtered_df = filtered_df[filtered_df['ocr_type'] == ocr_type]
if verified_only and 'verified' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['verified'] == True]
return filtered_df
def get_top_n(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
"""Get top N submissions"""
return df.head(n)
def create_comparison_data(df: pd.DataFrame) -> Dict:
"""Create data for comparison charts"""
if df.empty:
return {}
# Group by model provider
provider_stats = df.groupby('model_provider').agg({
'overall_exact_match': ['mean', 'max', 'count'],
'overall_tolerance_match': 'mean'
}).round(4)
# Group by OCR type
ocr_stats = df.groupby('ocr_type').agg({
'overall_exact_match': ['mean', 'max', 'count']
}).round(4)
return {
'by_provider': provider_stats.to_dict(),
'by_ocr': ocr_stats.to_dict()
}
|