judgy_reachy_no_phone / benchmark_tensorrt.py
yozkut's picture
Sync from GitHub via huggingface-sync-action
d247227 verified
#!/usr/bin/env python3
"""
TensorRT vs PyTorch Benchmark Script - 3-Way Comparison
Tests: TensorRT GPU, PyTorch GPU, PyTorch CPU
"""
import time
import numpy as np
import torch
def benchmark_yolo(model, num_frames=100, warmup_frames=10):
"""Benchmark YOLO detection speed."""
# Create test frame (640x480 RGB)
test_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
# Warm up
for _ in range(warmup_frames):
model(test_frame, verbose=False)
# Benchmark
start_time = time.time()
for _ in range(num_frames):
model(test_frame, verbose=False)
elapsed = time.time() - start_time
avg_ms = (elapsed / num_frames) * 1000
fps = num_frames / elapsed
return avg_ms, fps
def main():
from ultralytics import YOLO
print("=" * 70)
print("TensorRT vs PyTorch GPU vs PyTorch CPU Benchmark")
print("=" * 70)
print()
# Check hardware
print("Hardware Detection:")
print(f" CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f" GPU: {torch.cuda.get_device_name(0)}")
print(f" CUDA version: {torch.version.cuda}")
print(f" PyTorch version: {torch.__version__}")
print()
# Download model if needed
print("Downloading YOLO model if needed...")
YOLO("yolo26m.pt")
print()
results = {}
# Test 1: TensorRT (if NVIDIA GPU available)
if torch.cuda.is_available():
print("-" * 70)
print("Test 1: TensorRT on NVIDIA GPU")
print("-" * 70)
print(" Initializing TensorRT (will export on first run, ~1-2 min)...")
model_tensorrt = YOLO("yolo26m.pt")
# Export to TensorRT
try:
model_tensorrt.export(format='engine', device=0, half=True, workspace=4)
print(" ✅ TensorRT export complete!")
# Load the TensorRT engine
model_tensorrt = YOLO("yolo26m.engine")
print(" ✅ Loaded TensorRT engine")
except Exception as e:
print(f" ⚠️ TensorRT export failed: {e}")
print(" Falling back to PyTorch GPU...")
model_tensorrt = YOLO("yolo26m.pt")
print(" Warming up (10 frames)...")
print(" Running benchmark (100 frames)...")
avg_ms, fps = benchmark_yolo(model_tensorrt)
results['tensorrt'] = (fps, avg_ms)
print()
print(" Results:")
print(f" FPS: {fps:.1f}")
print(f" Latency: {avg_ms:.1f}ms")
print()
# Test 2: PyTorch on GPU (without TensorRT)
if torch.cuda.is_available():
print("-" * 70)
print("Test 2: PyTorch on NVIDIA GPU (no TensorRT)")
print("-" * 70)
print(" Loading PyTorch model on GPU...")
# Load fresh model, force to GPU without TensorRT
model_pytorch_gpu = YOLO("yolo26m.pt")
# Make sure it's on GPU
model_pytorch_gpu.to('cuda')
print(" Warming up (10 frames)...")
print(" Running benchmark (100 frames)...")
avg_ms, fps = benchmark_yolo(model_pytorch_gpu)
results['pytorch_gpu'] = (fps, avg_ms)
print()
print(" Results:")
print(f" FPS: {fps:.1f}")
print(f" Latency: {avg_ms:.1f}ms")
print()
# Test 3: PyTorch on CPU
print("-" * 70)
print("Test 3: PyTorch on CPU (baseline)")
print("-" * 70)
print(" Loading PyTorch model on CPU...")
# Load model explicitly on CPU
model_cpu = YOLO("yolo26m.pt")
model_cpu.to('cpu')
print(" Warming up (10 frames)...")
print(" Running benchmark (100 frames)...")
avg_ms, fps = benchmark_yolo(model_cpu)
results['cpu'] = (fps, avg_ms)
print()
print(" Results:")
print(f" FPS: {fps:.1f}")
print(f" Latency: {avg_ms:.1f}ms")
print()
# Summary
print("=" * 70)
print("SUMMARY")
print("=" * 70)
print()
if 'tensorrt' in results and 'pytorch_gpu' in results and 'cpu' in results:
fps_tensorrt, ms_tensorrt = results['tensorrt']
fps_pytorch_gpu, ms_pytorch_gpu = results['pytorch_gpu']
fps_cpu, ms_cpu = results['cpu']
# Calculate speedups
tensorrt_vs_pytorch = fps_tensorrt / fps_pytorch_gpu
tensorrt_vs_cpu = fps_tensorrt / fps_cpu
gpu_vs_cpu = fps_pytorch_gpu / fps_cpu
print(f" TensorRT (NVIDIA GPU): {fps_tensorrt:6.1f} FPS ({ms_tensorrt:6.1f}ms)")
print(f" PyTorch GPU: {fps_pytorch_gpu:6.1f} FPS ({ms_pytorch_gpu:6.1f}ms)")
print(f" PyTorch CPU: {fps_cpu:6.1f} FPS ({ms_cpu:6.1f}ms)")
print()
print(f" 🚀 TensorRT vs PyTorch GPU: {tensorrt_vs_pytorch:.2f}x faster")
print(f" 📊 GPU vs CPU (PyTorch): {gpu_vs_cpu:.1f}x faster")
print(f" 🎯 TensorRT vs CPU (total): {tensorrt_vs_cpu:.1f}x faster")
print()
print("=" * 70)
print("📋 Add this table to your README:")
print("=" * 70)
print()
print("| Backend | Hardware | FPS | Latency | TensorRT Speedup | vs CPU |")
print("|---------|----------|-----|---------|------------------|--------|")
print(f"| **TensorRT** | NVIDIA GPU | **{fps_tensorrt:.1f}** | {ms_tensorrt:.1f}ms | **{tensorrt_vs_pytorch:.2f}x** | {tensorrt_vs_cpu:.1f}x |")
print(f"| PyTorch | NVIDIA GPU | {fps_pytorch_gpu:.1f} | {ms_pytorch_gpu:.1f}ms | 1.0x | {gpu_vs_cpu:.1f}x |")
print(f"| PyTorch | CPU | {fps_cpu:.1f} | {ms_cpu:.1f}ms | - | 1.0x |")
print()
print(f"**TensorRT provides {tensorrt_vs_pytorch:.2f}x speedup over PyTorch on the same NVIDIA GPU!**")
elif 'tensorrt' in results and 'cpu' in results:
fps_tensorrt, ms_tensorrt = results['tensorrt']
fps_cpu, ms_cpu = results['cpu']
speedup = fps_tensorrt / fps_cpu
print(f" TensorRT (NVIDIA GPU): {fps_tensorrt:.1f} FPS ({ms_tensorrt:.1f}ms)")
print(f" PyTorch (CPU): {fps_cpu:.1f} FPS ({ms_cpu:.1f}ms)")
print()
print(f" 🚀 SPEEDUP: {speedup:.2f}x faster with TensorRT!")
else:
fps_cpu, ms_cpu = results['cpu']
print(f" PyTorch (CPU only): {fps_cpu:.1f} FPS ({ms_cpu:.1f}ms)")
print()
print(" ⚠️ Run on NVIDIA GPU to measure TensorRT speedup!")
if __name__ == "__main__":
main()