| import cv2
|
| import os, sys
|
| import time
|
| import numpy as np
|
| import axengine as ort
|
| import argparse
|
|
|
| def make_grid(nx=20, ny=20, i=0, strides=[8, 16, 32], anchors=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]]):
|
| """Generates a mesh grid for anchor boxes"""
|
|
|
| y, x = np.arange(ny, dtype=np.int32), np.arange(nx, dtype=np.int32)
|
| yv, xv = np.meshgrid(y, x, indexing="ij")
|
| grid = np.stack((xv, yv), 2)
|
| grid = np.expand_dims(grid, axis=0).repeat(len(anchors[0]) // 2, axis=0)
|
| grid = np.expand_dims(grid, axis=0) - 0.5
|
|
|
| anchor_grid = np.array(anchors[i]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
|
| anchor_grid = anchor_grid.repeat(ny, axis=2).repeat(nx, axis=3)
|
|
|
| return grid, anchor_grid
|
|
|
| def sigmoid(x):
|
| return 1 / (1 + np.exp(-x))
|
|
|
| def xywh2xyxy(x):
|
| """Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
|
| y = np.copy(x)
|
| y[..., 0] = x[..., 0] - x[..., 2] / 2
|
| y[..., 1] = x[..., 1] - x[..., 3] / 2
|
| y[..., 2] = x[..., 0] + x[..., 2] / 2
|
| y[..., 3] = x[..., 1] + x[..., 3] / 2
|
| return y
|
|
|
| def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), auto=False, scaleFill=False, scaleup=True, stride=32):
|
| """Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
|
| shape = im.shape[:2]
|
| if isinstance(new_shape, int):
|
| new_shape = (new_shape, new_shape)
|
|
|
|
|
| r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
| if not scaleup:
|
| r = min(r, 1.0)
|
|
|
|
|
| ratio = r, r
|
| new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
| dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
|
| if auto:
|
| dw, dh = np.mod(dw, stride), np.mod(dh, stride)
|
| elif scaleFill:
|
| dw, dh = 0.0, 0.0
|
| new_unpad = (new_shape[1], new_shape[0])
|
| ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
|
|
|
| dw /= 2
|
| dh /= 2
|
|
|
| if shape[::-1] != new_unpad:
|
| im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|
|
| top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
| left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
| im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
|
|
|
|
| return im, ratio, (dw, dh)
|
|
|
| def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
| """Rescales (xyxy) bounding boxes from img1_shape to img0_shape, optionally using provided `ratio_pad`."""
|
| if ratio_pad is None:
|
| gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
|
| pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2
|
|
|
| else:
|
| gain = ratio_pad[0][0]
|
| pad = ratio_pad[1]
|
|
|
| boxes[..., [0, 2]] -= pad[0]
|
| boxes[..., [1, 3]] -= pad[1]
|
| boxes[..., :4] /= gain
|
| return boxes
|
|
|
| def nms(boxes, iou_thresh=0.65):
|
| xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
|
| score = boxes[:, 4]
|
| areas = (xmax - xmin + 1)*(ymax - ymin + 1)
|
| order = score.argsort()[::-1]
|
|
|
| keep = []
|
| while order.size > 0:
|
| i = order[0]
|
| keep.append(i)
|
|
|
| xxmin = np.maximum(xmin[i], xmin[order[1:]])
|
| yymin = np.maximum(ymin[i], ymin[order[1:]])
|
| xxmax = np.minimum(xmax[i], xmax[order[1:]])
|
| yymax = np.minimum(ymax[i], ymax[order[1:]])
|
|
|
| w = np.maximum(0, xxmax - xxmin + 1)
|
| h = np.maximum(0, yymax - yymin + 1)
|
| inter = w * h
|
|
|
| iou = inter / (areas[i] + areas[order[1:]] - inter)
|
| order = order[np.where(iou <= iou_thresh)[0] + 1]
|
|
|
| return boxes[keep, :]
|
|
|
| def nms_multi(boxes, conf_thresh=0.25, iou_thresh=0.65, max_num=300):
|
| if len(boxes) == 0:
|
| return boxes
|
|
|
| boxes = boxes[np.where(boxes[:, 4] > conf_thresh)]
|
| result = list()
|
|
|
| cls_score = boxes[:, 5:]
|
| max_cls_index = np.argmax(cls_score, axis=-1)
|
| max_cls_score = np.max(cls_score, axis=-1)
|
|
|
| dets = np.concatenate([ boxes[:, :4],
|
| boxes[:, 4:5],
|
| max_cls_score[:, np.newaxis],
|
| max_cls_index[:, np.newaxis]], axis=-1)
|
| dets[:, 4] = dets[:, 4] * dets[:, 5]
|
| max_det = dets[:, 4].argsort()[::-1][:max_num]
|
| dets = dets[max_det, :]
|
| dets = dets[:, [0, 1, 2, 3, 4, 6]]
|
| unique_label = np.unique(max_cls_index)
|
|
|
| for c in unique_label:
|
| det = dets[dets[:, -1] == c]
|
| nmsed_det = nms(det, iou_thresh=iou_thresh)
|
| if len(nmsed_det):
|
| result.append(nmsed_det)
|
|
|
| if len(result):
|
| result = np.concatenate(result, axis=0)
|
| return result
|
| else:
|
| return []
|
|
|
| def onnx_inference(opt):
|
| session = ort.InferenceSession(opt.model)
|
|
|
| input_name = session.get_inputs()[0].name
|
| output_name = [output.name for output in session.get_outputs()]
|
| print(f"Input_name: {input_name}, Output_name: {output_name}")
|
|
|
| img = cv2.imread(f'{opt.img}')
|
| t1 = time.time()
|
| img_letter, ratio, (dw, dh) = letterbox(img, opt.imgsz)
|
| input_data = np.expand_dims(img_letter, axis=0)[..., ::-1].transpose((0, 3, 1, 2))
|
| t2 = time.time()
|
| print(f"Preprocess time: {(t2-t1)*1000:.2f} ms")
|
| t3 = time.time()
|
| outputs = session.run(output_name, {input_name:input_data})
|
| t4 = time.time()
|
| print(f"Inference time: {(t4-t3)*1000:.2f} ms")
|
|
|
| num_anchor = len(opt.anchors[0]) // 2
|
| channel = len(opt.classes) + 5
|
| predictions = list()
|
|
|
| for i, output in enumerate(outputs):
|
| bs, _, ny, nx = output.shape
|
| output = sigmoid(output.reshape(bs, num_anchor, channel, ny, nx).transpose(0, 1, 3, 4, 2))
|
|
|
| grid, anchor_grid = make_grid(nx, ny, i, opt.strides, opt.anchors)
|
|
|
| xy, wh, conf = output[..., :2], output[..., 2:4], output[..., 4:]
|
|
|
| xy = (xy * 2 + grid) * opt.strides[i]
|
| wh = (wh * 2) ** 2 * anchor_grid
|
|
|
| prediction = np.concatenate((xy, wh, conf), 4)
|
| prediction = prediction.reshape(bs, num_anchor * nx * ny, channel)
|
|
|
| prediction = xywh2xyxy(prediction)
|
| prediction[..., 0:4:2] = np.clip(prediction[..., 0:4:2], a_min=0, a_max=opt.imgsz[1])
|
| prediction[..., 1:4:2] = np.clip(prediction[..., 1:4:2], a_min=0, a_max=opt.imgsz[0])
|
|
|
| predictions.append(prediction)
|
|
|
| predictions = np.concatenate(predictions, axis=1).squeeze()
|
|
|
| predictions = nms_multi(predictions)
|
| if len(predictions) > 0:
|
| predictions[:, :4] = scale_boxes(img_letter.shape[:2], predictions[:, :4], img.shape).round()
|
| else:
|
| print("no target")
|
|
|
| print(f"Total detect {len(predictions)} objects")
|
| for i, (x1, y1, x2, y2, conf, label) in enumerate(predictions):
|
| print(f"{i}: {opt.classes[int(label)]}\t {conf:.3f} [{round(x1,1)}, {round(y1,1)}, {round(x2,1)}, {round(y2,1)}]")
|
|
|
| if opt.vis:
|
| box_xyxy = predictions[:, :4].astype(np.int32)
|
| scores = predictions[:, -2]
|
| labels = predictions[:, -1].astype(np.int32)
|
|
|
| for (x1, y1, x2, y2), score, label in zip(box_xyxy, scores, labels):
|
| img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| text_size = cv2.getTextSize(f"{opt.classes[label]}:{score:.3f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
| if (y1 - text_size[0][1]) < 0:
|
| textxy = (x1, y1 + text_size[0][1])
|
| else:
|
| textxy = (x1, y1)
|
| cv2.putText(img, f"{opt.classes[label]}:{score:.3f}", textxy, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
|
|
| cv2.imwrite(f'{opt.save_name}', img)
|
|
|
| def parse_opt():
|
| parser = argparse.ArgumentParser()
|
| parser.add_argument("--model", type=str, default="./AX650/ax_ax650_hel_algo_V1.0.0.axmodel", help="onnx model path")
|
| parser.add_argument("--img", type=str, default="./test.jpg", help="img_path")
|
| parser.add_argument("--anchors", type=float, default=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]], help="anchor based anchors")
|
| parser.add_argument("--strides", type=float, default=[8, 16], help="model strides")
|
| parser.add_argument("--imgsz", "--img-size", nargs="+", type=int, default=[256, 192], help="inference size h,w")
|
| parser.add_argument("--classes", type=str, default=["helmet", "head", "e-bike", "bike"], help="classes num")
|
| parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
|
| parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
|
| parser.add_argument("--max-det", type=int, default=50, help="maximum detections per image")
|
| parser.add_argument("--vis", default=True, help="visualize detect result")
|
| parser.add_argument("--save_name", type=str, default="./out.jpg", help="detect img save path")
|
| opt = parser.parse_args()
|
| return opt
|
|
|
| if __name__ == "__main__":
|
|
|
| opt = parse_opt()
|
| onnx_inference(opt) |