Helmet-axera / ax_hed_infer.py

Upload folder using huggingface_hub

500e650 verified 22 days ago

10.2 kB

	import cv2
	import os, sys
	import time
	import numpy as np
	import axengine as ort
	import argparse

	def make_grid(nx=20, ny=20, i=0, strides=[8, 16, 32], anchors=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]]):
	"""Generates a mesh grid for anchor boxes"""
	# shape = 1, len(anchors[i]) // 2, ny, nx, 2 # grid shape
	y, x = np.arange(ny, dtype=np.int32), np.arange(nx, dtype=np.int32)
	yv, xv = np.meshgrid(y, x, indexing="ij")
	grid = np.stack((xv, yv), 2)
	grid = np.expand_dims(grid, axis=0).repeat(len(anchors[0]) // 2, axis=0)
	grid = np.expand_dims(grid, axis=0) - 0.5 #add grid offset, i.e. y = 2.0 * x - 0.5
	# anchor_grid = np.array([anchor*strides[i] for anchor in anchors[i]]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
	anchor_grid = np.array(anchors[i]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
	anchor_grid = anchor_grid.repeat(ny, axis=2).repeat(nx, axis=3)
	# print(anchor_grid.shape, shape)
	return grid, anchor_grid

	def sigmoid(x):
	return 1 / (1 + np.exp(-x))

	def xywh2xyxy(x):
	"""Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
	y = np.copy(x)
	y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
	y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
	y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
	y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
	return y

	def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), auto=False, scaleFill=False, scaleup=True, stride=32):
	"""Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
	shape = im.shape[:2] # current shape [height, width]
	if isinstance(new_shape, int):
	new_shape = (new_shape, new_shape)

	# Scale ratio (new / old)
	r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
	if not scaleup: # only scale down, do not scale up (for better val mAP)
	r = min(r, 1.0)

	# Compute padding
	ratio = r, r # width, height ratios
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
	if auto: # minimum rectangle
	dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
	elif scaleFill: # stretch
	dw, dh = 0.0, 0.0
	new_unpad = (new_shape[1], new_shape[0])
	ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios

	dw /= 2 # divide padding into 2 sides
	dh /= 2

	if shape[::-1] != new_unpad: # resize
	im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border for 2 sides
	# im = cv2.copyMakeBorder(im, 0, int(dh), 0, int(dw), cv2.BORDER_CONSTANT, value=color) # add border for right and bottom

	return im, ratio, (dw, dh)

	def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
	"""Rescales (xyxy) bounding boxes from img1_shape to img0_shape, optionally using provided `ratio_pad`."""
	if ratio_pad is None: # calculate from img0_shape
	gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
	pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
	# pad = (0, 0)
	else:
	gain = ratio_pad[0][0]
	pad = ratio_pad[1]

	boxes[..., [0, 2]] -= pad[0] # x padding
	boxes[..., [1, 3]] -= pad[1] # y padding
	boxes[..., :4] /= gain
	return boxes

	def nms(boxes, iou_thresh=0.65):
	xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
	score = boxes[:, 4]
	areas = (xmax - xmin + 1)*(ymax - ymin + 1)
	order = score.argsort()[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)

	xxmin = np.maximum(xmin[i], xmin[order[1:]])
	yymin = np.maximum(ymin[i], ymin[order[1:]])
	xxmax = np.minimum(xmax[i], xmax[order[1:]])
	yymax = np.minimum(ymax[i], ymax[order[1:]])

	w = np.maximum(0, xxmax - xxmin + 1)
	h = np.maximum(0, yymax - yymin + 1)
	inter = w * h

	iou = inter / (areas[i] + areas[order[1:]] - inter)
	order = order[np.where(iou <= iou_thresh)[0] + 1] #索引需要加1

	return boxes[keep, :]

	def nms_multi(boxes, conf_thresh=0.25, iou_thresh=0.65, max_num=300):
	if len(boxes) == 0:
	return boxes

	boxes = boxes[np.where(boxes[:, 4] > conf_thresh)]
	result = list()

	cls_score = boxes[:, 5:]
	max_cls_index = np.argmax(cls_score, axis=-1)
	max_cls_score = np.max(cls_score, axis=-1)

	dets = np.concatenate([ boxes[:, :4], # 0:3 坐标
	boxes[:, 4:5], # 4 obj
	max_cls_score[:, np.newaxis], # 5 最大类别分数
	max_cls_index[:, np.newaxis]], axis=-1) # 6 类别
	dets[:, 4] = dets[:, 4] * dets[:, 5]
	max_det = dets[:, 4].argsort()[::-1][:max_num]
	dets = dets[max_det, :]
	dets = dets[:, [0, 1, 2, 3, 4, 6]]
	unique_label = np.unique(max_cls_index)

	for c in unique_label:
	det = dets[dets[:, -1] == c]
	nmsed_det = nms(det, iou_thresh=iou_thresh)
	if len(nmsed_det):
	result.append(nmsed_det)

	if len(result):
	result = np.concatenate(result, axis=0)
	return result
	else:
	return []

	def onnx_inference(opt):
	session = ort.InferenceSession(opt.model)

	input_name = session.get_inputs()[0].name
	output_name = [output.name for output in session.get_outputs()]
	print(f"Input_name: {input_name}, Output_name: {output_name}")

	img = cv2.imread(f'{opt.img}')
	t1 = time.time()
	img_letter, ratio, (dw, dh) = letterbox(img, opt.imgsz) # h w c
	input_data = np.expand_dims(img_letter, axis=0)[..., ::-1].transpose((0, 3, 1, 2))
	t2 = time.time()
	print(f"Preprocess time: {(t2-t1)*1000:.2f} ms")
	t3 = time.time()
	outputs = session.run(output_name, {input_name:input_data})
	t4 = time.time()
	print(f"Inference time: {(t4-t3)*1000:.2f} ms")

	num_anchor = len(opt.anchors[0]) // 2
	channel = len(opt.classes) + 5
	predictions = list()

	for i, output in enumerate(outputs):
	bs, _, ny, nx = output.shape # x(bs,255,20,20) to x(bs,3,20,20,85)
	output = sigmoid(output.reshape(bs, num_anchor, channel, ny, nx).transpose(0, 1, 3, 4, 2))

	grid, anchor_grid = make_grid(nx, ny, i, opt.strides, opt.anchors)

	xy, wh, conf = output[..., :2], output[..., 2:4], output[..., 4:]

	xy = (xy * 2 + grid) * opt.strides[i] # xy
	wh = (wh * 2) ** 2 * anchor_grid # wh

	prediction = np.concatenate((xy, wh, conf), 4)
	prediction = prediction.reshape(bs, num_anchor * nx * ny, channel)

	prediction = xywh2xyxy(prediction)
	prediction[..., 0:4:2] = np.clip(prediction[..., 0:4:2], a_min=0, a_max=opt.imgsz[1])
	prediction[..., 1:4:2] = np.clip(prediction[..., 1:4:2], a_min=0, a_max=opt.imgsz[0])

	predictions.append(prediction)

	predictions = np.concatenate(predictions, axis=1).squeeze()
	# predictions format: [x1, y1, x2, y2, obj, cls_score, obj*cls_score, label]
	predictions = nms_multi(predictions) #TODO multi label for one box
	if len(predictions) > 0:
	predictions[:, :4] = scale_boxes(img_letter.shape[:2], predictions[:, :4], img.shape).round()
	else:
	print("no target")

	print(f"Total detect {len(predictions)} objects")
	for i, (x1, y1, x2, y2, conf, label) in enumerate(predictions):
	print(f"{i}: {opt.classes[int(label)]}\t {conf:.3f} [{round(x1,1)}, {round(y1,1)}, {round(x2,1)}, {round(y2,1)}]")

	if opt.vis:
	box_xyxy = predictions[:, :4].astype(np.int32)
	scores = predictions[:, -2]
	labels = predictions[:, -1].astype(np.int32)

	for (x1, y1, x2, y2), score, label in zip(box_xyxy, scores, labels):
	img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
	text_size = cv2.getTextSize(f"{opt.classes[label]}:{score:.3f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	if (y1 - text_size[0][1]) < 0:
	textxy = (x1, y1 + text_size[0][1])
	else:
	textxy = (x1, y1)
	cv2.putText(img, f"{opt.classes[label]}:{score:.3f}", textxy, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

	cv2.imwrite(f'{opt.save_name}', img)

	def parse_opt():
	parser = argparse.ArgumentParser()
	parser.add_argument("--model", type=str, default="./AX650/ax_ax650_hel_algo_V1.0.0.axmodel", help="onnx model path")
	parser.add_argument("--img", type=str, default="./test.jpg", help="img_path")
	parser.add_argument("--anchors", type=float, default=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]], help="anchor based anchors")
	parser.add_argument("--strides", type=float, default=[8, 16], help="model strides")
	parser.add_argument("--imgsz", "--img-size", nargs="+", type=int, default=[256, 192], help="inference size h,w")
	parser.add_argument("--classes", type=str, default=["helmet", "head", "e-bike", "bike"], help="classes num")
	parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
	parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
	parser.add_argument("--max-det", type=int, default=50, help="maximum detections per image")
	parser.add_argument("--vis", default=True, help="visualize detect result")
	parser.add_argument("--save_name", type=str, default="./out.jpg", help="detect img save path")
	opt = parser.parse_args()
	return opt

	if __name__ == "__main__":

	opt = parse_opt()
	onnx_inference(opt)