Spaces:

ChambreAgriculturePaysLoire
/

routeur_ia_api

Running

File size: 26,620 Bytes

227f51c

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Extraction des Prestations de Service - Catalogue CAPL\n",
        "\n",
        "Ce notebook extrait les données structurées des prestations de service depuis le catalogue PDF.\n",
        "\n",
        "**Étapes:**\n",
        "1. Chargement et upload du PDF\n",
        "2. OCR avec Mistral\n",
        "3. Conversion en Markdown\n",
        "4. Extraction structurée par lots avec JSON Schema\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "execution_count": 1,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "from dotenv import load_dotenv\n",
        "load_dotenv()\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os, sys, json\n",
        "from pathlib import Path\n",
        "\n",
        "# Ensure project root on path (notebook is under 'knowledge/')\n",
        "project_root = Path('..').resolve()\n",
        "if str(project_root) not in sys.path:\n",
        "    sys.path.append(str(project_root))\n",
        "\n",
        "from services import mistral_service\n",
        "from services.batch_extractor_service import BatchExtractor, BatchConfig, load_ocr_result\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 1. Chargement et upload du PDF\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Resolved catalog path: C:\\Users\\cd\\Documents\\CAPL\\ROUTEUR\\dev\\routeur_ia_api\\knowledge\\docs\\CAPL Catalogue SERVICE 2024_2025.pdf\n",
            "PDF size (bytes): 31633158\n",
            "Signed URL prefix: https://mistralaifilesapiprodswe.blob.core.windows.net/fine-...\n",
            "{'type': 'document_url', 'document_url': 'https://mistralaifilesapiprodswe.blob.core.windows.net/fine-tune/20ee85df-97f2-4acc-90e8-d3419e046f02/3c69bf03-8111-497d-93a9-538e56ce1bb6/21115a1395af4a2caca44bd322d2825b.pdf?se=2026-01-21T16%3A00%3A28Z&sp=r&sv=2025-01-05&sr=b&sig=Xdn5oRg6FkJyG61Y8orNDSKLVj/7XnIV8IM29yrWCwo%3D'}\n"
          ]
        }
      ],
      "source": [
        "# Locate the catalog PDF\n",
        "catalog_filename = 'CAPL Catalogue SERVICE 2024_2025.pdf'\n",
        "\n",
        "# Primary: relative to notebook dir (knowledge/docs)\n",
        "catalog_path = Path('docs') / catalog_filename\n",
        "if not catalog_path.exists():\n",
        "    # Fallback: if executed from project root\n",
        "    alt_path = Path('knowledge') / 'docs' / catalog_filename\n",
        "    if alt_path.exists():\n",
        "        catalog_path = alt_path\n",
        "    else:\n",
        "        raise FileNotFoundError(f\"Catalog not found at {catalog_path} or {alt_path}\")\n",
        "\n",
        "print(\"Resolved catalog path:\", catalog_path.resolve())\n",
        "\n",
        "# Read PDF bytes\n",
        "pdf_bytes = catalog_path.read_bytes()\n",
        "print(\"PDF size (bytes):\", len(pdf_bytes))\n",
        "\n",
        "# Upload and build document source for OCR/completion (REST-based)\n",
        "source_or_url = mistral_service.upload_pdf(pdf_bytes, catalog_path.name)\n",
        "if source_or_url.startswith(\"data:\"):\n",
        "    document_source = mistral_service.build_document_url(source_or_url)\n",
        "    print(\"Using data URI as document source (length):\", len(source_or_url))\n",
        "else:\n",
        "    print(\"Signed URL prefix:\", str(source_or_url)[:60] + \"...\")\n",
        "    document_source = mistral_service.build_document_url(source_or_url)\n",
        "\n",
        "print(document_source)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2. OCR Processing avec Mistral\n",
        "\n",
        "Utilisation de l'OCR de Mistral pour extraire le contenu structuré du PDF.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Exécution de l'OCR avec Mistral...\n",
            "Résultat OCR sauvegardé dans prestations_ocr_result.json\n",
            "Nombre de pages extraites : 52\n"
          ]
        }
      ],
      "source": [
        "# Chemin pour stocker le résultat OCR brut\n",
        "ocr_json_path = Path('prestations_ocr_result.json')\n",
        "\n",
        "# Vérifier si l'OCR a déjà été fait pour éviter de retraiter\n",
        "if ocr_json_path.exists():\n",
        "    print(f\"OCR déjà effectué. Chargement depuis {ocr_json_path}\")\n",
        "    with open(ocr_json_path, 'r', encoding='utf-8') as f:\n",
        "        ocr_result = json.load(f)\n",
        "else:\n",
        "    print(\"Exécution de l'OCR avec Mistral...\")\n",
        "    ocr_result = mistral_service.process_ocr(\n",
        "        document_source=document_source,\n",
        "        include_image_base64=False\n",
        "    )\n",
        "    \n",
        "    # Sauvegarder la structure complète\n",
        "    with open(ocr_json_path, 'w', encoding='utf-8') as f:\n",
        "        json.dump(ocr_result, f, ensure_ascii=False, indent=2)\n",
        "    print(f\"Résultat OCR sauvegardé dans {ocr_json_path}\")\n",
        "\n",
        "# Afficher quelques informations sur le résultat\n",
        "num_pages = len(ocr_result.get('pages', []))\n",
        "print(f\"Nombre de pages extraites : {num_pages}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 3. Conversion OCR → Markdown\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Contenu Markdown sauvegardé dans prestations_catalog_ocr.md\n",
            "Taille du markdown : 70778 caractères\n",
            "\n",
            "Premiers 500 caractères :\n",
            "# CAPL Catalogue SERVICE 2024_2025.pdf\n",
            "\n",
            "**Document traité par OCR Mistral**  \n",
            "**Nombre de pages :** 52  \n",
            "**Date de traitement :** 1768924910.856373\n",
            "\n",
            "---\n",
            "\n",
            "\n",
            "\n",
            "---\n",
            "\n",
            "# Page 1\n",
            "\n",
            "# PROZGRI \n",
            "\n",
            "POUR VOUS. AUJOURD'HUI. ET DEMAIN\n",
            "![img-0.jpeg](img-0.jpeg)\n",
            "\n",
            "OFFRE DE SERVICES AUX AGRICULTEURS\n",
            "\n",
            "## Des <br> solutions <br> pour réussir vos projets\n",
            "\n",
            "\n",
            "---\n",
            "\n",
            "# Page 2\n",
            "\n",
            "# Le réseau Chambre d'agriculture \n",
            "\n",
            "des Pays de la Loire\n",
            "\n",
            "## Les Chambres d'agriculture\n",
            "\n",
            "$1^{\\text {ER }}$ RÉSEAU de conseil agricole et territorial ce...\n"
          ]
        }
      ],
      "source": [
        "# Convertir le résultat OCR en Markdown\n",
        "markdown_content = mistral_service.ocr_response_to_markdown(ocr_result)\n",
        "\n",
        "# Ajouter un en-tête avec des métadonnées\n",
        "header = f\"\"\"# {catalog_filename}\n",
        "\n",
        "**Document traité par OCR Mistral**  \n",
        "**Nombre de pages :** {num_pages}  \n",
        "**Date de traitement :** {Path(ocr_json_path).stat().st_mtime if ocr_json_path.exists() else 'N/A'}\n",
        "\n",
        "---\n",
        "\n",
        "\"\"\"\n",
        "\n",
        "full_markdown = header + markdown_content\n",
        "\n",
        "# Sauvegarder dans un fichier MD\n",
        "md_output_path = Path('prestations_catalog_ocr.md')\n",
        "with open(md_output_path, 'w', encoding='utf-8') as f:\n",
        "    f.write(full_markdown)\n",
        "\n",
        "print(f\"Contenu Markdown sauvegardé dans {md_output_path}\")\n",
        "print(f\"Taille du markdown : {len(full_markdown)} caractères\")\n",
        "print(f\"\\nPremiers 500 caractères :\\n{full_markdown[:500]}...\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 4. Extraction structurée avec JSON Schema\n",
        "\n",
        "Utilisation du service BatchExtractor pour extraire les prestations de manière structurée.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "✅ Prompts et schéma JSON définis\n",
            "\n",
            "📋 Champs à extraire:\n",
            "   - title\n",
            "   - content\n",
            "   - domain\n",
            "   - target_audience\n",
            "   - associated_tools\n",
            "   - label\n",
            "   - service\n",
            "   - contact\n"
          ]
        }
      ],
      "source": [
        "# =====================================================\n",
        "# CONFIGURATION DU PROMPT ET SCHEMA POUR PRESTATIONS\n",
        "# =====================================================\n",
        "\n",
        "SYSTEM_PROMPT = (\n",
        "    \"You are an expert data extractor. From the provided French service catalog (Catalogue des prestations), \"\n",
        "    \"extract a clean list of services (prestations). For each prestation, extract the following fields \"\n",
        "    \"when available: title (titre/nom), content (contenu/description), domain (domaine), \"\n",
        "    \"target_audience (cible), associated_tools (outils associés), label, service (service proposant), \"\n",
        "    \"and contact. \"\n",
        "    \"Note: Not all fields will be available for every prestation - use null for missing data. \"\n",
        "    \"Keep French text as-is. Return only strict JSON following the schema.\"\n",
        ")\n",
        "\n",
        "JSON_SCHEMA = {\n",
        "    \"name\": \"CatalogPrestations\",\n",
        "    \"schema_definition\": {\n",
        "        \"title\": \"CatalogPrestations\",\n",
        "        \"type\": \"object\",\n",
        "        \"properties\": {\n",
        "            \"prestations\": {\n",
        "                \"type\": \"array\",\n",
        "                \"items\": {\n",
        "                    \"type\": \"object\",\n",
        "                    \"properties\": {\n",
        "                        \"title\": {\n",
        "                            \"type\": \"string\", \n",
        "                            \"description\": \"Titre / Nom de la prestation\"\n",
        "                        },\n",
        "                        \"content\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Contenu / Description de la prestation\"\n",
        "                        },\n",
        "                        \"domain\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Domaine de la prestation\"\n",
        "                        },\n",
        "                        \"target_audience\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Cible / Public visé\"\n",
        "                        },\n",
        "                        \"associated_tools\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Outils associés\"\n",
        "                        },\n",
        "                        \"label\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Label de la prestation\"\n",
        "                        },\n",
        "                        \"service\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Service proposant la prestation\"\n",
        "                        },\n",
        "                        \"contact\": {\n",
        "                            \"type\": [\"string\", \"null\"], \n",
        "                            \"description\": \"Contact pour la prestation\"\n",
        "                        }\n",
        "                    },\n",
        "                    \"required\": [\"title\"]\n",
        "                }\n",
        "            }\n",
        "        },\n",
        "        \"required\": [\"prestations\"],\n",
        "        \"additionalProperties\": False\n",
        "    },\n",
        "    \"description\": \"Schema for extracting service catalog data\",\n",
        "    \"strict\": True\n",
        "}\n",
        "\n",
        "print(\"✅ Prompts et schéma JSON définis\")\n",
        "print(f\"\\n📋 Champs à extraire:\")\n",
        "for field_name in JSON_SCHEMA['schema_definition']['properties']['prestations']['items']['properties'].keys():\n",
        "    print(f\"   - {field_name}\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "📂 Chargement de prestations_ocr_result.json\n",
            "📄 Pages à traiter: 6 à 44 (39 pages sur 52 total)\n",
            "🔄 Traitement en 4 batchs de 10 pages max...\n",
            "⏱️ Pause de 2.0s entre chaque batch\n",
            "\n",
            "📦 Batch 1/4 - Pages 6 à 15... ✅ 65 éléments extraits\n",
            "📦 Batch 2/4 - Pages 16 à 25... ✅ 85 éléments extraits\n",
            "📦 Batch 3/4 - Pages 26 à 35... ✅ 32 éléments extraits\n",
            "📦 Batch 4/4 - Pages 36 à 44... ✅ 39 éléments extraits\n",
            "\n",
            "============================================================\n",
            "📊 RÉSULTAT FINAL\n",
            "============================================================\n",
            "✅ Total éléments extraits: 221\n",
            "\n",
            "💾 Résultat sauvegardé dans: prestations_2425.json\n",
            "\n",
            "📝 Aperçu des 3 premières prestations:\n",
            "  1. Point accueil installation (PAI)\n",
            "  2. Trouver son site d'exploitation\n",
            "  3. Renforcer ses compétences de chef d'entreprise\n"
          ]
        }
      ],
      "source": [
        "# =====================================================\n",
        "# EXTRACTION PAR BATCH\n",
        "# =====================================================\n",
        "\n",
        "# Charger le résultat OCR\n",
        "ocr_json_path = Path('prestations_ocr_result.json')\n",
        "ocr_data = load_ocr_result(ocr_json_path)\n",
        "print(f\"📂 Chargement de {ocr_json_path}\")\n",
        "\n",
        "# Configuration personnalisable\n",
        "# ⚠️ Pour traiter uniquement une plage de pages, définir start_page et end_page\n",
        "#    Les numéros de pages sont 1-indexed (page 1 = première page du PDF)\n",
        "#    Exemples:\n",
        "#      - Pages 1 à 20:    start_page=1, end_page=20\n",
        "#      - Pages 50 à 80:   start_page=50, end_page=80\n",
        "#      - Toutes les pages: start_page=None, end_page=None (par défaut)\n",
        "\n",
        "config = BatchConfig(\n",
        "    batch_size=10,        # 10 pages par batch (ajuster si timeout persiste)\n",
        "    pause_seconds=2.0,    # 2 secondes entre les batchs\n",
        "    max_retries=3,        # 3 tentatives max par batch\n",
        "    retry_delay=5.0,      # 5 secondes entre les tentatives\n",
        "    start_page=6,      # 👈 Page de début (1-indexed) ou None pour le début\n",
        "    end_page=44,        # 👈 Page de fin (1-indexed, inclusive) ou None pour la fin\n",
        ")\n",
        "\n",
        "# Créer l'extracteur avec le service réutilisable\n",
        "extractor = BatchExtractor(\n",
        "    ocr_result=ocr_data,\n",
        "    system_prompt=SYSTEM_PROMPT,\n",
        "    json_schema=JSON_SCHEMA,\n",
        "    items_key=\"prestations\",  # Clé dans la réponse JSON\n",
        "    user_prompt=\"Extrait les prestations de service de ces pages du catalogue.\",\n",
        "    config=config\n",
        ")\n",
        "\n",
        "# Lancer l'extraction et sauvegarder\n",
        "output_path = Path('prestations_2425.json')\n",
        "result = extractor.extract_to_file(output_path, items_key=\"prestations\")\n",
        "\n",
        "# Aperçu des résultats\n",
        "print(f\"\\n📝 Aperçu des 3 premières prestations:\")\n",
        "for i, prestation in enumerate(result.items[:3], 1):\n",
        "    print(f\"  {i}. {prestation.get('title', 'Sans titre')}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 5. Exploration des résultats\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "📊 Statistiques d'extraction:\n",
            "   - total_pages_in_document: 52\n",
            "   - pages_processed: 39\n",
            "   - page_range: 6-44\n",
            "   - total_batches: 4\n",
            "   - successful_batches: 4\n",
            "   - failed_batches: 0\n",
            "   - total_items: 221\n",
            "\n",
            "📋 Nombre total de prestations: 221\n",
            "\n",
            "🏷️ Répartition par domaine:\n",
            "   - Environnement et réglementation: 10\n",
            "   - Installation: 8\n",
            "   - Élevage et traite: 8\n",
            "   - PRODUCTIONS ANIMALES: 8\n",
            "   - Transformation et qualité: 7\n",
            "   - Numérique et productions végétales: 7\n",
            "   - ÉNERGIE: 7\n",
            "   - Bâtiments et infrastructures: 6\n",
            "   - Productions végétales: 6\n",
            "   - Productions végétales (Viticulture): 6\n",
            "   - Climat: 6\n",
            "   - AGRONOMIE: 6\n",
            "   - Transmission: 5\n",
            "   - Démarches administratives et réglementaires: 5\n",
            "   - Numérique et productions animales: 5\n",
            "   - ÉCONOMIE CIRCULAIRE: 5\n",
            "   - FORMATION: 5\n",
            "   - Relations humaines, Association: 4\n",
            "   - Réglementation et environnement: 4\n",
            "   - Échanges parcellaires: 4\n",
            "   - Arbre et biodiversité: 4\n",
            "   - DIFFUSION DE CONNAISSANCES: 4\n",
            "   - Transmission, Association: 3\n",
            "   - Démarches administratives et réglementaires, Élevage: 3\n",
            "   - Management/Ressources humaines: 3\n",
            "   - Commercialisation: 3\n",
            "   - Commercialisation et aménagement: 3\n",
            "   - Export et international: 3\n",
            "   - Énergie et environnement: 3\n",
            "   - Conseil stratégique phytosanitaire: 3\n",
            "   - Maraîchage: 3\n",
            "   - Irrigation: 3\n",
            "   - Projets et stratégie d'entreprise: 2\n",
            "   - Organisation du travail: 2\n",
            "   - Management/Ressources humaines, Formation: 2\n",
            "   - Agriculture biologique, Conversion: 2\n",
            "   - Commercialisation et stratégie: 2\n",
            "   - Financement et aides: 2\n",
            "   - Communication commerciale: 2\n",
            "   - Élevage et performance économique: 2\n",
            "   - Élevage et fourrages: 2\n",
            "   - Élevage et performance: 2\n",
            "   - Environnement et certification: 2\n",
            "   - Non spécifié: 2\n",
            "   - AGROÉCOLOGIE: 2\n",
            "   - Installation, Formation: 1\n",
            "   - Installation, Transmission: 1\n",
            "   - Agriculture biologique, Installation: 1\n",
            "   - Démarches administratives et réglementaires, Transformation: 1\n",
            "   - Environnement: 1\n",
            "   - Environnement, Formation: 1\n",
            "   - Laboratoire, Productions végétales: 1\n",
            "   - Laboratoire, Productions animales: 1\n",
            "   - Laboratoire, Microbiologie: 1\n",
            "   - Projets et stratégie d'entreprise, Solidarité: 1\n",
            "   - Agriculture biologique: 1\n",
            "   - Agriculture biologique, Bâtiment: 1\n",
            "   - Agriculture biologique, Productions végétales: 1\n",
            "   - Agriculture biologique, Viticulture: 1\n",
            "   - Agriculture biologique, Maraîchage: 1\n",
            "   - Agriculture biologique, Élevage: 1\n",
            "   - Agriculture biologique, Élevage, Santé animale: 1\n",
            "   - Agriculture biologique, Diversification, Transformation: 1\n",
            "   - Agriculture biologique, Diversification, Commercialisation: 1\n",
            "   - Agriculture biologique, Diversification, Tourisme: 1\n",
            "   - Agriculture biologique, Formation, Conseil: 1\n",
            "   - Agriculture biologique, Installation, Projets: 1\n",
            "   - Agriculture biologique, Conversion, Formation: 1\n",
            "   - Agriculture biologique, Conseil technique: 1\n",
            "   - Agriculture biologique, Stratégie d'entreprise: 1\n",
            "   - Agriculture biologique, Réglementation: 1\n",
            "   - Installation et développement d'activité: 1\n",
            "   - Réseaux et commercialisation: 1\n",
            "   - Accompagnement et formation: 1\n",
            "   - Environnement et climat: 1\n",
            "   - Élevage et santé animale: 1\n",
            "   - Environnement et agronomie: 1\n",
            "   - Environnement et financement: 1\n",
            "   - Numérique et traçabilité: 1\n",
            "   - Numérique et commercialisation: 1\n"
          ]
        }
      ],
      "source": [
        "# Charger et explorer les résultats\n",
        "with open('prestations_2425.json', 'r', encoding='utf-8') as f:\n",
        "    data = json.load(f)\n",
        "\n",
        "prestations = data.get('prestations', [])\n",
        "stats = data.get('stats', {})\n",
        "\n",
        "print(f\"📊 Statistiques d'extraction:\")\n",
        "for key, value in stats.items():\n",
        "    print(f\"   - {key}: {value}\")\n",
        "\n",
        "print(f\"\\n📋 Nombre total de prestations: {len(prestations)}\")\n",
        "\n",
        "# Analyser les domaines\n",
        "domains = {}\n",
        "for p in prestations:\n",
        "    domain = p.get('domain') or 'Non spécifié'\n",
        "    domains[domain] = domains.get(domain, 0) + 1\n",
        "\n",
        "print(f\"\\n🏷️ Répartition par domaine:\")\n",
        "for domain, count in sorted(domains.items(), key=lambda x: -x[1]):\n",
        "    print(f\"   - {domain}: {count}\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "✅ Export CSV sauvegardé dans: prestations_2425.csv\n",
            "   221 lignes exportées\n"
          ]
        }
      ],
      "source": [
        "# Export en CSV pour analyse dans Excel\n",
        "import csv\n",
        "\n",
        "csv_output_path = Path('prestations_2425.csv')\n",
        "\n",
        "# Mapping des champs JSON vers les noms de colonnes CSV en français\n",
        "COLUMN_MAPPING = {\n",
        "    'title': 'Titre',\n",
        "    'content': 'Contenu',\n",
        "    'domain': 'Domaine',\n",
        "    'target_audience': 'Cible',\n",
        "    'associated_tools': 'Outils associés',\n",
        "    'label': 'label',\n",
        "    'service': 'service',\n",
        "    'contact': 'contact'\n",
        "}\n",
        "\n",
        "# Colonnes CSV en français\n",
        "fieldnames = ['Titre', 'Contenu', 'Domaine', 'Cible', 'Outils associés', 'label', 'service', 'contact']\n",
        "\n",
        "with open(csv_output_path, 'w', newline='', encoding='utf-8-sig') as csvfile:\n",
        "    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=';', extrasaction='ignore')\n",
        "    writer.writeheader()\n",
        "    \n",
        "    for prestation in prestations:\n",
        "        # Mapper les clés JSON vers les noms de colonnes français\n",
        "        row = {\n",
        "            COLUMN_MAPPING[k]: (v if v is not None else '')\n",
        "            for k, v in prestation.items()\n",
        "            if k in COLUMN_MAPPING\n",
        "        }\n",
        "        writer.writerow(row)\n",
        "\n",
        "print(f\"✅ Export CSV sauvegardé dans: {csv_output_path}\")\n",
        "print(f\"   {len(prestations)} lignes exportées\")\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.12.2"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}