# Internal documentation (all-caps filenames — not for public Space) /[A-Z][A-Z_0-9-]*.md !/README.md # Python __pycache__/ *.py[cod] *$py.class *.so .Python *.egg-info/ dist/ build/ *.egg # Jupyter Notebook .ipynb_checkpoints */.ipynb_checkpoints/* # Virtual environments venv/ venv_*/ env/ ENV/ .venv # IDE .vscode/ .idea/ *.swp *.swo *~ # OS .DS_Store Thumbs.db # Model checkpoints and outputs models/ output/ output_*/ seq2seq_model_handwritten/ seq2seq_*/ cyrillic_seq2seq_*/ kazars_trocr_*/ checkpoint-*/ *.pt *.pth *.bin *.safetensors # Data data/ datasets/ full-datasets/ unpacked-datasets/ *.arrow *.csv *.txt !requirements.txt !requirements-gpu.txt !requirements-kraken.txt !README.txt # Images and media *.jpg *.jpeg *.png *.tif *.tiff *.bmp !example*.png !sample*.png !assets/*.png !web/static/pwa/icons/*.png # Test images directory HTR_Images/ # Logs and temporary files logs/ *.log runs/ tensorboard/ wandb/ # Security scanning reports gitleaks_*.json *_report.json # Compressed files *.zip *.tar.gz *.rar # Large files *.h5 *.hdf5 # Transkribus exports page/ *.xml !example*.xml # ignore local claude settings .claude/ # Test/debug scripts investigate_page440.py resume_extraction.py run_ddp_manual.py run_ddp_test.py train_minimal_example.py test_segmenter_comparison.py config_test_ddp.yaml config_ukrainian.yaml # Platform-specific build scripts run_training_ddp.bat run_training_ddp.ps1 # Windows artifacts nul .hf_model_history.json # API keys storage (from GUI) .trocr_gui/ .env *.env # External repositories party_repo/ kraken_repo/ # Backup and temporary files *_backup.py *_backup_*.py fix_*.py # Session documentation (auto-generated) COMPREHENSIVE_*.md QWEN_*.md KRAKEN_*.md SESSION_SUMMARY_*.md Documentation/ # Detailed project documentation (hardware-specific, too detailed for public) CLAUDE.md # Internal planning documents (not for public repo) *_PLAN.md *_PLAN_*.md IMPLEMENTATION_SUMMARY.md PARTY_FIX_TESTING.md PARTY_POC_VS_PLUGIN_COMPARISON.md QUICK_START_IMPROVEMENTS.md # Training scripts and logs (specific to our setup) run_pylaia_*.sh start_pylaia_*training*.py start_pylaia_*replica.py resume_pylaia_*.py train_pylaia_*_pagexml.py monitor_*.sh *.backup # Test and debug scripts (temporary) check_*.py test_*.py # Exception: web API test suite is a proper test, not a throwaway script !web/tests/test_server.py !web/tests/test_comparison_metrics.py # Accidentally created files =* # HuggingFace model download history (auto-generated) .hf_model_history.json # Legacy/broken inference implementations inference_pylaia.py inference_pylaia_lm.py # Virtual environments (project-specific) churro_venv/ party_env/ # Status and implementation notes (temporary documentation) *_STATUS.md *_NOTES.md *_READY.md *_COMPLETE.md *_TODO.md *_ISSUE.md *_QUICKSTART.md *_LESSONS_LEARNED.md *_UPDATE.md *_IMPLEMENTATION.md *_BUGFIX_*.md TRAINING_STATUS_*.md # Experimental scripts (Churro) convert_pylaia_to_churro_*.py prepare_*_churro.py finetune_churro_*.py inference_churro.py run_churro_*.sh # Debug scripts debug_*.py # Server environment config (private) SERVER_ENV.md htr_gui/ # Training logs training_ukrainian_v2c.log nohup.out nohup_*.log # Lightning logs lightning_logs/ # Internal planning docs *_PLAN.md PLAN_*.md # Training run scripts (local) run_party_*.sh # Jupyter notebooks (local experiments) *.ipynb # Web UI upload temp dirs (created at runtime) /tmp/polyscriptor_uploads_*/ # Web UI key store (contains API keys — never commit) web/api_keys.json web/uploads/ # Diagnostic and inspection scripts (temporary) diagnose_exif_mismatch.py inspect_*.ipynb # Gabelsberger shorthand preparation (work in progress) prepare_gabelsberger_shorthand.py *.gitlab-token # Internal handoff / briefing documents (not for public repo) *_HANDOFF.md # Runtime PID files (created by training/process scripts) *.pid