| """Generate markdown resource views and search index from catalog JSON. |
| |
| Usage: |
| python scripts/generate_resource_views.py |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
| from typing import Any |
|
|
|
|
| CATEGORY_CONFIG = { |
| "dataset": ("resources/datasets/README.md", "Datasets"), |
| "model": ("resources/models/README.md", "Models"), |
| "benchmark": ("resources/benchmarks/README.md", "Benchmarks"), |
| "tool": ("resources/tools/README.md", "Tools"), |
| "paper": ("resources/papers/README.md", "Papers"), |
| "project": ("resources/projects/README.md", "Projects"), |
| "code": ("resources/codes/README.md", "Code"), |
| } |
|
|
|
|
| def _load_catalog(path: Path) -> dict[str, Any]: |
| return json.loads(path.read_text(encoding="utf-8")) |
|
|
|
|
| def _escape_cell(value: str) -> str: |
| return value.replace("|", "\\|").strip() |
|
|
|
|
| def _marker_text(markers: list[str]) -> str: |
| return ", ".join(f"`{marker}`" for marker in markers) |
|
|
|
|
| def _resource_row(resource: dict[str, Any]) -> str: |
| evidence = resource["pashto_evidence"] |
| evidence_text = _escape_cell(evidence["evidence_text"]) |
| markers = _marker_text(evidence["markers"]) |
| if markers: |
| evidence_text = f"{evidence_text} ({markers})" |
| return ( |
| f"| {_escape_cell(resource['title'])} | " |
| f"[{resource['source']}]({resource['url']}) | " |
| f"[{evidence_text}]({evidence['evidence_url']}) | " |
| f"{_escape_cell(resource['primary_use'])} |" |
| ) |
|
|
|
|
| def _write_markdown_table(path: Path, title: str, resources: list[dict[str, Any]]) -> None: |
| lines = [ |
| f"# {title}", |
| "", |
| "## Verified Pashto Resources", |
| "", |
| "| Resource | Link | Pashto Evidence | Primary Use |", |
| "|---|---|---|---|", |
| ] |
|
|
| if resources: |
| lines.extend(_resource_row(resource) for resource in resources) |
| else: |
| lines.append("| _None yet_ | - | - | - |") |
|
|
| lines.extend( |
| [ |
| "", |
| "## Maintenance", |
| "- Source of truth: [../catalog/resources.json](../catalog/resources.json)", |
| "- Validation: [../../scripts/validate_resource_catalog.py](../../scripts/validate_resource_catalog.py)", |
| "- Generated by: [../../scripts/generate_resource_views.py](../../scripts/generate_resource_views.py)", |
| "", |
| ] |
| ) |
| path.write_text("\n".join(lines), encoding="utf-8") |
|
|
|
|
| def _write_resources_home(path: Path, counts: dict[str, int], total_verified: int) -> None: |
| lines = [ |
| "# Resources", |
| "", |
| "Structured, Pashto-focused resource tracking lives in this folder.", |
| "", |
| "## Sections", |
| f"- Datasets ({counts.get('dataset', 0)}): [datasets/README.md](datasets/README.md)", |
| f"- Models ({counts.get('model', 0)}): [models/README.md](models/README.md)", |
| f"- Benchmarks ({counts.get('benchmark', 0)}): [benchmarks/README.md](benchmarks/README.md)", |
| f"- Tools ({counts.get('tool', 0)}): [tools/README.md](tools/README.md)", |
| f"- Papers ({counts.get('paper', 0)}): [papers/README.md](papers/README.md)", |
| f"- Projects ({counts.get('project', 0)}): [projects/README.md](projects/README.md)", |
| f"- Code ({counts.get('code', 0)}): [codes/README.md](codes/README.md)", |
| "", |
| "## Machine-Readable Catalog", |
| "- Canonical catalog: [catalog/resources.json](catalog/resources.json)", |
| "- Candidate feed: [catalog/pending_candidates.json](catalog/pending_candidates.json)", |
| "- Schema: [schema/resource.schema.json](schema/resource.schema.json)", |
| "", |
| "## Update Rule", |
| "- Add only validated resources with explicit Pashto relevance.", |
| "- Keep every external reference clickable using markdown links.", |
| "- Run `python scripts/validate_resource_catalog.py` before opening a PR.", |
| "- Run `python scripts/generate_resource_views.py` after catalog changes.", |
| "", |
| f"Verified resource count: `{total_verified}`", |
| "", |
| ] |
| path.write_text("\n".join(lines), encoding="utf-8") |
|
|
|
|
| def _build_search_payload(resources: list[dict[str, Any]], updated_on: str) -> dict[str, Any]: |
| search_items: list[dict[str, Any]] = [] |
| for resource in resources: |
| evidence = resource["pashto_evidence"] |
| search_items.append( |
| { |
| "id": resource["id"], |
| "title": resource["title"], |
| "url": resource["url"], |
| "category": resource["category"], |
| "source": resource["source"], |
| "status": resource["status"], |
| "summary": resource["summary"], |
| "primary_use": resource["primary_use"], |
| "tasks": resource.get("tasks", []), |
| "tags": resource["tags"], |
| "evidence_text": evidence["evidence_text"], |
| "evidence_url": evidence["evidence_url"], |
| "markers": evidence["markers"], |
| } |
| ) |
|
|
| return { |
| "generated_on": f"{updated_on}T00:00:00Z", |
| "count": len(search_items), |
| "resources": search_items, |
| } |
|
|
|
|
| def main() -> int: |
| catalog_path = Path("resources/catalog/resources.json") |
| catalog = _load_catalog(catalog_path) |
| resources: list[dict[str, Any]] = catalog.get("resources", []) |
| updated_on = catalog.get("updated_on", "1970-01-01") |
| verified = [resource for resource in resources if resource.get("status") == "verified"] |
|
|
| grouped: dict[str, list[dict[str, Any]]] = {category: [] for category in CATEGORY_CONFIG} |
| for resource in verified: |
| category = resource.get("category") |
| if category in grouped: |
| grouped[category].append(resource) |
|
|
| for category, (file_path, title) in CATEGORY_CONFIG.items(): |
| output_path = Path(file_path) |
| output_path.parent.mkdir(parents=True, exist_ok=True) |
| rows = sorted(grouped[category], key=lambda item: item["title"].lower()) |
| _write_markdown_table(output_path, title, rows) |
|
|
| counts = {category: len(items) for category, items in grouped.items()} |
| _write_resources_home(Path("resources/README.md"), counts, len(verified)) |
|
|
| search_payload = _build_search_payload(resources, updated_on) |
| search_json_path = Path("docs/search/resources.json") |
| search_json_path.parent.mkdir(parents=True, exist_ok=True) |
| search_json_path.write_text( |
| json.dumps(search_payload, ensure_ascii=False, indent=2) + "\n", |
| encoding="utf-8", |
| ) |
|
|
| print( |
| "Generated resources markdown and search index: " |
| f"{len(verified)} verified resources, {len(resources)} total resources" |
| ) |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|