Helmet-axera / ax_hed_infer.py
yz123's picture
Upload folder using huggingface_hub
500e650 verified
import cv2
import os, sys
import time
import numpy as np
import axengine as ort
import argparse
def make_grid(nx=20, ny=20, i=0, strides=[8, 16, 32], anchors=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]]):
"""Generates a mesh grid for anchor boxes"""
# shape = 1, len(anchors[i]) // 2, ny, nx, 2 # grid shape
y, x = np.arange(ny, dtype=np.int32), np.arange(nx, dtype=np.int32)
yv, xv = np.meshgrid(y, x, indexing="ij")
grid = np.stack((xv, yv), 2)
grid = np.expand_dims(grid, axis=0).repeat(len(anchors[0]) // 2, axis=0)
grid = np.expand_dims(grid, axis=0) - 0.5 #add grid offset, i.e. y = 2.0 * x - 0.5
# anchor_grid = np.array([anchor*strides[i] for anchor in anchors[i]]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
anchor_grid = np.array(anchors[i]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
anchor_grid = anchor_grid.repeat(ny, axis=2).repeat(nx, axis=3)
# print(anchor_grid.shape, shape)
return grid, anchor_grid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def xywh2xyxy(x):
"""Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), auto=False, scaleFill=False, scaleup=True, stride=32):
"""Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border for 2 sides
# im = cv2.copyMakeBorder(im, 0, int(dh), 0, int(dw), cv2.BORDER_CONSTANT, value=color) # add border for right and bottom
return im, ratio, (dw, dh)
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
"""Rescales (xyxy) bounding boxes from img1_shape to img0_shape, optionally using provided `ratio_pad`."""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
# pad = (0, 0)
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
return boxes
def nms(boxes, iou_thresh=0.65):
xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
score = boxes[:, 4]
areas = (xmax - xmin + 1)*(ymax - ymin + 1)
order = score.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xxmin = np.maximum(xmin[i], xmin[order[1:]])
yymin = np.maximum(ymin[i], ymin[order[1:]])
xxmax = np.minimum(xmax[i], xmax[order[1:]])
yymax = np.minimum(ymax[i], ymax[order[1:]])
w = np.maximum(0, xxmax - xxmin + 1)
h = np.maximum(0, yymax - yymin + 1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
order = order[np.where(iou <= iou_thresh)[0] + 1] #索引需要加1
return boxes[keep, :]
def nms_multi(boxes, conf_thresh=0.25, iou_thresh=0.65, max_num=300):
if len(boxes) == 0:
return boxes
boxes = boxes[np.where(boxes[:, 4] > conf_thresh)]
result = list()
cls_score = boxes[:, 5:]
max_cls_index = np.argmax(cls_score, axis=-1)
max_cls_score = np.max(cls_score, axis=-1)
dets = np.concatenate([ boxes[:, :4], # 0:3 坐标
boxes[:, 4:5], # 4 obj
max_cls_score[:, np.newaxis], # 5 最大类别分数
max_cls_index[:, np.newaxis]], axis=-1) # 6 类别
dets[:, 4] = dets[:, 4] * dets[:, 5]
max_det = dets[:, 4].argsort()[::-1][:max_num]
dets = dets[max_det, :]
dets = dets[:, [0, 1, 2, 3, 4, 6]]
unique_label = np.unique(max_cls_index)
for c in unique_label:
det = dets[dets[:, -1] == c]
nmsed_det = nms(det, iou_thresh=iou_thresh)
if len(nmsed_det):
result.append(nmsed_det)
if len(result):
result = np.concatenate(result, axis=0)
return result
else:
return []
def onnx_inference(opt):
session = ort.InferenceSession(opt.model)
input_name = session.get_inputs()[0].name
output_name = [output.name for output in session.get_outputs()]
print(f"Input_name: {input_name}, Output_name: {output_name}")
img = cv2.imread(f'{opt.img}')
t1 = time.time()
img_letter, ratio, (dw, dh) = letterbox(img, opt.imgsz) # h w c
input_data = np.expand_dims(img_letter, axis=0)[..., ::-1].transpose((0, 3, 1, 2))
t2 = time.time()
print(f"Preprocess time: {(t2-t1)*1000:.2f} ms")
t3 = time.time()
outputs = session.run(output_name, {input_name:input_data})
t4 = time.time()
print(f"Inference time: {(t4-t3)*1000:.2f} ms")
num_anchor = len(opt.anchors[0]) // 2
channel = len(opt.classes) + 5
predictions = list()
for i, output in enumerate(outputs):
bs, _, ny, nx = output.shape # x(bs,255,20,20) to x(bs,3,20,20,85)
output = sigmoid(output.reshape(bs, num_anchor, channel, ny, nx).transpose(0, 1, 3, 4, 2))
grid, anchor_grid = make_grid(nx, ny, i, opt.strides, opt.anchors)
xy, wh, conf = output[..., :2], output[..., 2:4], output[..., 4:]
xy = (xy * 2 + grid) * opt.strides[i] # xy
wh = (wh * 2) ** 2 * anchor_grid # wh
prediction = np.concatenate((xy, wh, conf), 4)
prediction = prediction.reshape(bs, num_anchor * nx * ny, channel)
prediction = xywh2xyxy(prediction)
prediction[..., 0:4:2] = np.clip(prediction[..., 0:4:2], a_min=0, a_max=opt.imgsz[1])
prediction[..., 1:4:2] = np.clip(prediction[..., 1:4:2], a_min=0, a_max=opt.imgsz[0])
predictions.append(prediction)
predictions = np.concatenate(predictions, axis=1).squeeze()
# predictions format: [x1, y1, x2, y2, obj, cls_score, obj*cls_score, label]
predictions = nms_multi(predictions) #TODO multi label for one box
if len(predictions) > 0:
predictions[:, :4] = scale_boxes(img_letter.shape[:2], predictions[:, :4], img.shape).round()
else:
print("no target")
print(f"Total detect {len(predictions)} objects")
for i, (x1, y1, x2, y2, conf, label) in enumerate(predictions):
print(f"{i}: {opt.classes[int(label)]}\t {conf:.3f} [{round(x1,1)}, {round(y1,1)}, {round(x2,1)}, {round(y2,1)}]")
if opt.vis:
box_xyxy = predictions[:, :4].astype(np.int32)
scores = predictions[:, -2]
labels = predictions[:, -1].astype(np.int32)
for (x1, y1, x2, y2), score, label in zip(box_xyxy, scores, labels):
img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
text_size = cv2.getTextSize(f"{opt.classes[label]}:{score:.3f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
if (y1 - text_size[0][1]) < 0:
textxy = (x1, y1 + text_size[0][1])
else:
textxy = (x1, y1)
cv2.putText(img, f"{opt.classes[label]}:{score:.3f}", textxy, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.imwrite(f'{opt.save_name}', img)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="./AX650/ax_ax650_hel_algo_V1.0.0.axmodel", help="onnx model path")
parser.add_argument("--img", type=str, default="./test.jpg", help="img_path")
parser.add_argument("--anchors", type=float, default=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]], help="anchor based anchors")
parser.add_argument("--strides", type=float, default=[8, 16], help="model strides")
parser.add_argument("--imgsz", "--img-size", nargs="+", type=int, default=[256, 192], help="inference size h,w")
parser.add_argument("--classes", type=str, default=["helmet", "head", "e-bike", "bike"], help="classes num")
parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
parser.add_argument("--max-det", type=int, default=50, help="maximum detections per image")
parser.add_argument("--vis", default=True, help="visualize detect result")
parser.add_argument("--save_name", type=str, default="./out.jpg", help="detect img save path")
opt = parser.parse_args()
return opt
if __name__ == "__main__":
opt = parse_opt()
onnx_inference(opt)