Spaces:

jackkuo
/

llm-enzyme-kinetics-leaderboard

Sleeping

File size: 4,725 Bytes

"""Utility functions for leaderboard"""
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional
import json


def load_leaderboard_data(data_dir: str = "leaderboard/data") -> pd.DataFrame:
    """

    Load all leaderboard data from JSON files



    Args:

        data_dir: Directory containing submission JSON files



    Returns:

        DataFrame with all submissions

    """
    data_path = Path(data_dir)
    if not data_path.exists():
        # Create empty DataFrame with default columns
        return pd.DataFrame(columns=[
            'submission_id', 'model_name', 'model_provider', 'ocr_type',
            'submitter', 'submission_date', 'km_exact_match', 'km_tolerance_match',
            'kcat_exact_match', 'kcat_tolerance_match', 'km_kcat_exact_match',
            'km_kcat_tolerance_match', 'overall_exact_match', 'overall_tolerance_match',
            'total_papers', 'total_entries', 'notes', 'verified'
        ])

    all_data = []

    # Load from main data directory
    for json_file in data_path.glob("*.json"):
        try:
            with open(json_file, 'r') as f:
                data = json.load(f)
                all_data.append(data)
        except Exception as e:
            print(f"Error loading {json_file}: {e}")

    # Load from submissions/ subdirectory (auto-eval submissions)
    submissions_dir = data_path / "submissions"
    if submissions_dir.exists():
        for json_file in submissions_dir.glob("*.json"):
            try:
                with open(json_file, 'r') as f:
                    data = json.load(f)
                    all_data.append(data)
            except Exception as e:
                print(f"Error loading {json_file}: {e}")

    if not all_data:
        return pd.DataFrame(columns=[
            'submission_id', 'model_name', 'model_provider', 'ocr_type',
            'submitter', 'submission_date', 'km_exact_match', 'km_tolerance_match',
            'kcat_exact_match', 'kcat_tolerance_match', 'km_kcat_exact_match',
            'km_kcat_tolerance_match', 'overall_exact_match', 'overall_tolerance_match',
            'total_papers', 'total_entries', 'notes', 'verified'
        ])

    df = pd.DataFrame(all_data)

    # Convert date strings to datetime
    if 'submission_date' in df.columns:
        df['submission_date'] = pd.to_datetime(df['submission_date'])

    return df.sort_values('overall_exact_match', ascending=False)


def format_metrics(value: float, as_percentage: bool = True) -> str:
    """Format metric value for display"""
    if as_percentage:
        return f"{value * 100:.2f}%"
    return f"{value:.4f}"


def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
    """Get summary statistics from leaderboard"""
    if df.empty:
        return {
            'total_submissions': 0,
            'unique_models': 0,
            'best_score': 0.0,
            'avg_score': 0.0
        }

    return {
        'total_submissions': len(df),
        'unique_models': df['model_name'].nunique(),
        'best_score': df['overall_exact_match'].max() * 100,
        'avg_score': df['overall_exact_match'].mean() * 100,
        'verified_submissions': df['verified'].sum() if 'verified' in df.columns else 0
    }


def filter_leaderboard(

    df: pd.DataFrame,

    model_provider: Optional[str] = None,

    ocr_type: Optional[str] = None,

    verified_only: bool = False

) -> pd.DataFrame:
    """Filter leaderboard based on criteria"""
    filtered_df = df.copy()

    if model_provider and model_provider != "All":
        filtered_df = filtered_df[filtered_df['model_provider'] == model_provider]

    if ocr_type and ocr_type != "All":
        filtered_df = filtered_df[filtered_df['ocr_type'] == ocr_type]

    if verified_only and 'verified' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['verified'] == True]

    return filtered_df


def get_top_n(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
    """Get top N submissions"""
    return df.head(n)


def create_comparison_data(df: pd.DataFrame) -> Dict:
    """Create data for comparison charts"""
    if df.empty:
        return {}

    # Group by model provider
    provider_stats = df.groupby('model_provider').agg({
        'overall_exact_match': ['mean', 'max', 'count'],
        'overall_tolerance_match': 'mean'
    }).round(4)

    # Group by OCR type
    ocr_stats = df.groupby('ocr_type').agg({
        'overall_exact_match': ['mean', 'max', 'count']
    }).round(4)

    return {
        'by_provider': provider_stats.to_dict(),
        'by_ocr': ocr_stats.to_dict()
    }