{ "root": "/kaggle/input/datasets/josephayanda/curated-agent-coding-dataset-sft-v1", "used": [ { "file": "targeted_sft_2000.jsonl", "available": 2000, "selected": 2000, "purpose": "targeted failure families" }, { "file": "eval_maxxing_3500.jsonl", "available": 3500, "selected": 3200, "purpose": "hidden-test style coding" }, { "file": "coding_sft_5000.jsonl", "available": 5000, "selected": 3200, "purpose": "broad language coding" }, { "file": "real_world_coding_3500.jsonl", "available": 3500, "selected": 1800, "purpose": "production coding tasks" }, { "file": "heavy_real_world_agentic_5000.jsonl", "available": 5000, "selected": 120, "purpose": "long repo-agent examples" } ], "skipped_preference_pairs": [ "orpo_gold_1000.jsonl", "reasoning_preference_1000.jsonl" ], "external": [ { "dataset": "ise-uiuc/Magicoder-Evol-Instruct-110K", "selected": 96 }, { "dataset": "m-a-p/CodeFeedback-Filtered-Instruction", "selected": 96 }, { "dataset": "HuggingFaceH4/CodeAlpaca_20K", "selected": 96 }, { "dataset": "glaiveai/glaive-code-assistant-v3", "selected": 96 } ] }