| |
| """ |
| TensorRT vs PyTorch Benchmark Script - 3-Way Comparison |
| Tests: TensorRT GPU, PyTorch GPU, PyTorch CPU |
| """ |
|
|
| import time |
| import numpy as np |
| import torch |
|
|
| def benchmark_yolo(model, num_frames=100, warmup_frames=10): |
| """Benchmark YOLO detection speed.""" |
| |
| test_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) |
|
|
| |
| for _ in range(warmup_frames): |
| model(test_frame, verbose=False) |
|
|
| |
| start_time = time.time() |
| for _ in range(num_frames): |
| model(test_frame, verbose=False) |
| elapsed = time.time() - start_time |
|
|
| avg_ms = (elapsed / num_frames) * 1000 |
| fps = num_frames / elapsed |
|
|
| return avg_ms, fps |
|
|
| def main(): |
| from ultralytics import YOLO |
|
|
| print("=" * 70) |
| print("TensorRT vs PyTorch GPU vs PyTorch CPU Benchmark") |
| print("=" * 70) |
| print() |
|
|
| |
| print("Hardware Detection:") |
| print(f" CUDA available: {torch.cuda.is_available()}") |
| if torch.cuda.is_available(): |
| print(f" GPU: {torch.cuda.get_device_name(0)}") |
| print(f" CUDA version: {torch.version.cuda}") |
| print(f" PyTorch version: {torch.__version__}") |
| print() |
|
|
| |
| print("Downloading YOLO model if needed...") |
| YOLO("yolo26m.pt") |
| print() |
|
|
| results = {} |
|
|
| |
| if torch.cuda.is_available(): |
| print("-" * 70) |
| print("Test 1: TensorRT on NVIDIA GPU") |
| print("-" * 70) |
| print(" Initializing TensorRT (will export on first run, ~1-2 min)...") |
|
|
| model_tensorrt = YOLO("yolo26m.pt") |
| |
| try: |
| model_tensorrt.export(format='engine', device=0, half=True, workspace=4) |
| print(" ✅ TensorRT export complete!") |
|
|
| |
| model_tensorrt = YOLO("yolo26m.engine") |
| print(" ✅ Loaded TensorRT engine") |
| except Exception as e: |
| print(f" ⚠️ TensorRT export failed: {e}") |
| print(" Falling back to PyTorch GPU...") |
| model_tensorrt = YOLO("yolo26m.pt") |
|
|
| print(" Warming up (10 frames)...") |
| print(" Running benchmark (100 frames)...") |
| avg_ms, fps = benchmark_yolo(model_tensorrt) |
| results['tensorrt'] = (fps, avg_ms) |
|
|
| print() |
| print(" Results:") |
| print(f" FPS: {fps:.1f}") |
| print(f" Latency: {avg_ms:.1f}ms") |
| print() |
|
|
| |
| if torch.cuda.is_available(): |
| print("-" * 70) |
| print("Test 2: PyTorch on NVIDIA GPU (no TensorRT)") |
| print("-" * 70) |
| print(" Loading PyTorch model on GPU...") |
|
|
| |
| model_pytorch_gpu = YOLO("yolo26m.pt") |
| |
| model_pytorch_gpu.to('cuda') |
|
|
| print(" Warming up (10 frames)...") |
| print(" Running benchmark (100 frames)...") |
| avg_ms, fps = benchmark_yolo(model_pytorch_gpu) |
| results['pytorch_gpu'] = (fps, avg_ms) |
|
|
| print() |
| print(" Results:") |
| print(f" FPS: {fps:.1f}") |
| print(f" Latency: {avg_ms:.1f}ms") |
| print() |
|
|
| |
| print("-" * 70) |
| print("Test 3: PyTorch on CPU (baseline)") |
| print("-" * 70) |
| print(" Loading PyTorch model on CPU...") |
|
|
| |
| model_cpu = YOLO("yolo26m.pt") |
| model_cpu.to('cpu') |
|
|
| print(" Warming up (10 frames)...") |
| print(" Running benchmark (100 frames)...") |
| avg_ms, fps = benchmark_yolo(model_cpu) |
| results['cpu'] = (fps, avg_ms) |
|
|
| print() |
| print(" Results:") |
| print(f" FPS: {fps:.1f}") |
| print(f" Latency: {avg_ms:.1f}ms") |
| print() |
|
|
| |
| print("=" * 70) |
| print("SUMMARY") |
| print("=" * 70) |
| print() |
|
|
| if 'tensorrt' in results and 'pytorch_gpu' in results and 'cpu' in results: |
| fps_tensorrt, ms_tensorrt = results['tensorrt'] |
| fps_pytorch_gpu, ms_pytorch_gpu = results['pytorch_gpu'] |
| fps_cpu, ms_cpu = results['cpu'] |
|
|
| |
| tensorrt_vs_pytorch = fps_tensorrt / fps_pytorch_gpu |
| tensorrt_vs_cpu = fps_tensorrt / fps_cpu |
| gpu_vs_cpu = fps_pytorch_gpu / fps_cpu |
|
|
| print(f" TensorRT (NVIDIA GPU): {fps_tensorrt:6.1f} FPS ({ms_tensorrt:6.1f}ms)") |
| print(f" PyTorch GPU: {fps_pytorch_gpu:6.1f} FPS ({ms_pytorch_gpu:6.1f}ms)") |
| print(f" PyTorch CPU: {fps_cpu:6.1f} FPS ({ms_cpu:6.1f}ms)") |
| print() |
| print(f" 🚀 TensorRT vs PyTorch GPU: {tensorrt_vs_pytorch:.2f}x faster") |
| print(f" 📊 GPU vs CPU (PyTorch): {gpu_vs_cpu:.1f}x faster") |
| print(f" 🎯 TensorRT vs CPU (total): {tensorrt_vs_cpu:.1f}x faster") |
| print() |
| print("=" * 70) |
| print("📋 Add this table to your README:") |
| print("=" * 70) |
| print() |
| print("| Backend | Hardware | FPS | Latency | TensorRT Speedup | vs CPU |") |
| print("|---------|----------|-----|---------|------------------|--------|") |
| print(f"| **TensorRT** | NVIDIA GPU | **{fps_tensorrt:.1f}** | {ms_tensorrt:.1f}ms | **{tensorrt_vs_pytorch:.2f}x** | {tensorrt_vs_cpu:.1f}x |") |
| print(f"| PyTorch | NVIDIA GPU | {fps_pytorch_gpu:.1f} | {ms_pytorch_gpu:.1f}ms | 1.0x | {gpu_vs_cpu:.1f}x |") |
| print(f"| PyTorch | CPU | {fps_cpu:.1f} | {ms_cpu:.1f}ms | - | 1.0x |") |
| print() |
| print(f"**TensorRT provides {tensorrt_vs_pytorch:.2f}x speedup over PyTorch on the same NVIDIA GPU!**") |
|
|
| elif 'tensorrt' in results and 'cpu' in results: |
| fps_tensorrt, ms_tensorrt = results['tensorrt'] |
| fps_cpu, ms_cpu = results['cpu'] |
| speedup = fps_tensorrt / fps_cpu |
|
|
| print(f" TensorRT (NVIDIA GPU): {fps_tensorrt:.1f} FPS ({ms_tensorrt:.1f}ms)") |
| print(f" PyTorch (CPU): {fps_cpu:.1f} FPS ({ms_cpu:.1f}ms)") |
| print() |
| print(f" 🚀 SPEEDUP: {speedup:.2f}x faster with TensorRT!") |
|
|
| else: |
| fps_cpu, ms_cpu = results['cpu'] |
| print(f" PyTorch (CPU only): {fps_cpu:.1f} FPS ({ms_cpu:.1f}ms)") |
| print() |
| print(" ⚠️ Run on NVIDIA GPU to measure TensorRT speedup!") |
|
|
| if __name__ == "__main__": |
| main() |
|
|