Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-fullcycle-tpu-handoff /tinymind_fullcycle_colab_tpu_train.ipynb
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# TinyMind Full-Cycle Colab TPU/GPU Handoff\n", | |
| "This notebook continues TinyMind-native training on a broad real-data curriculum plus pure algebra. It selects TPU/XLA when the Colab runtime exposes torch_xla, otherwise CUDA/CPU fallback is used.\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from google.colab import files\n", | |
| "uploaded = files.upload()\n", | |
| "assert 'tinymind_fullcycle_tpu_handoff_bundle.zip' in uploaded\n", | |
| "!rm -rf /content/tinymind_colab && mkdir -p /content/tinymind_colab\n", | |
| "!unzip -q tinymind_fullcycle_tpu_handoff_bundle.zip -d /content/tinymind_colab\n", | |
| "%cd /content/tinymind_colab/bundle\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!pip -q install datasets huggingface_hub safetensors pytest\n", | |
| "import json, os, subprocess, sys\n", | |
| "import torch\n", | |
| "accelerator = 'cpu'\n", | |
| "try:\n", | |
| " import torch_xla.core.xla_model as xm\n", | |
| " accelerator = 'tpu-xla'\n", | |
| " print('TPU/XLA detected:', xm.xla_device())\n", | |
| "except Exception as exc:\n", | |
| " print('TPU/XLA not active:', type(exc).__name__, str(exc)[:120])\n", | |
| "if torch.cuda.is_available():\n", | |
| " accelerator = 'cuda'\n", | |
| "print('accelerator selected:', accelerator)\n", | |
| "print('cuda available:', torch.cuda.is_available())\n", | |
| "print('gpu:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'none')\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!python -m pytest tests/test_adaptive_tool_fabric.py tests/test_toolweb_native.py -q\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "device_args = []\n", | |
| "if accelerator == 'cuda':\n", | |
| " device_args = ['--device', 'cuda']\n", | |
| "elif accelerator == 'tpu-xla':\n", | |
| " device_args = ['--device', 'xla']\n", | |
| "cmd = [\n", | |
| " sys.executable, '-m', 'train.cli', 'native-axiom-regenesis-train',\n", | |
| " '--dataset', '/content/tinymind_colab/bundle/data/fullcycle_broader_algebra_curriculum.jsonl',\n", | |
| " '--resume-checkpoint', '/content/tinymind_colab/bundle/checkpoint/checkpoint.pt',\n", | |
| " '--out-dir', '/content/tinymind_fullcycle_train_out',\n", | |
| " '--max-steps', '2400', '--limit-records', '20000', '--eval-records', '384',\n", | |
| " '--dim', '128', '--layers', '4', '--lanes', '12', '--seq-len', '128', '--vocab-size', '512',\n", | |
| " '--tokenizer-mode', 'clean_char_v2', '--generation-head-type', 'phrase_copy',\n", | |
| " '--learning-rate', '8e-5', '--train-batch-size', '8', '--warmup-steps', '120',\n", | |
| " '--lr-scheduler-type', 'cosine', '--min-lr-ratio', '0.04',\n", | |
| " '--repeat-unlikelihood-weight', '0.035', '--entropy-floor-weight', '0.0008', '--entropy-floor', '1.15',\n", | |
| " '--anchor-coverage-weight', '0.09', '--anchor-planner-logit-scale', '1.05', '--anchor-planner-loss-weight', '0.10',\n", | |
| " '--phrase-copy-logit-scale', '14.0', '--pure-logic-aux-weight', '0.04', '--pure-logic-residual-scale', '0.12',\n", | |
| " '--memory-contraction', '0.86', '--memory-update-scale', '0.13', '--memory-state-clamp', '1.65', '--tf32-training',\n", | |
| "] + device_args\n", | |
| "print('RUN:', ' '.join(cmd))\n", | |
| "subprocess.run(cmd, check=True)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "probe_cmds = [\n", | |
| " [sys.executable, '-m', 'train.cli', 'native-broad-probe', '--native-checkpoint', '/content/tinymind_fullcycle_train_out/checkpoint.pt', '--out-dir', '/content/tinymind_fullcycle_broad_probe', '--max-new-tokens', '192'],\n", | |
| " [sys.executable, '-m', 'train.cli', 'native-natural-response-gate', '--checkpoint', '/content/tinymind_fullcycle_train_out/checkpoint.pt', '--out-dir', '/content/tinymind_fullcycle_natural_gate', '--max-new-tokens', '160'],\n", | |
| " [sys.executable, '-m', 'train.cli', 'adaptive-tool-fabric-probe', '--out-dir', '/content/tinymind_fullcycle_toolfabric_probe'],\n", | |
| "]\n", | |
| "for cmd in probe_cmds:\n", | |
| " print('RUN:', ' '.join(cmd))\n", | |
| " subprocess.run(cmd, check=True)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!zip -qr /content/tinymind_fullcycle_colab_results.zip /content/tinymind_fullcycle_train_out /content/tinymind_fullcycle_broad_probe /content/tinymind_fullcycle_natural_gate /content/tinymind_fullcycle_toolfabric_probe\n", | |
| "from google.colab import files\n", | |
| "files.download('/content/tinymind_fullcycle_colab_results.zip')\n" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } | |
Xet Storage Details
- Size:
- 5.61 kB
- Xet hash:
- 607ea5318e3f41db3caa9b3e84c741089931048abe56642a7d8a172b2d47441c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.