""" Trainer correctness AKSARA berbasis state-native. Modul ini mempertahankan antarmuka lama untuk logging/evaluasi, namun semua istilah yang mengarah ke prediksi token, logits, atau decoding berurutan telah dihapus dari narasi dan keluaran bantu. """ from __future__ import annotations from typing import Any, Dict, Optional class CorrectnessTrainer: """Wrapper ringan untuk monitoring skor correctness.""" def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs self.history = [] def update(self, batch: Optional[Dict[str, Any]] = None, metrics: Optional[Dict[str, Any]] = None): batch = batch or {} metrics = metrics or {} summary = { "accuracy": float(metrics.get("accuracy", metrics.get("score", 0.0))), "avg_pos": float(metrics.get("avg_pos", metrics.get("positive_score", 0.0))), } self.history.append( { "batch_keys": sorted(list(batch.keys())), "summary": summary, } ) return summary def summarize(self) -> Dict[str, float]: if not self.history: return {"accuracy": 0.0, "avg_pos": 0.0} acc = sum(item["summary"]["accuracy"] for item in self.history) / len(self.history) avg_pos = sum(item["summary"]["avg_pos"] for item in self.history) / len(self.history) return {"accuracy": float(acc), "avg_pos": float(avg_pos)}