import cv2
import os, sys
import time
import numpy as np
import axengine as ort
import argparse

def make_grid(nx=20, ny=20, i=0, strides=[8, 16, 32], anchors=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]]):
    """Generates a mesh grid for anchor boxes"""
    # shape = 1, len(anchors[i]) // 2, ny, nx, 2  # grid shape
    y, x = np.arange(ny, dtype=np.int32), np.arange(nx, dtype=np.int32)
    yv, xv = np.meshgrid(y, x, indexing="ij")
    grid = np.stack((xv, yv), 2)
    grid = np.expand_dims(grid, axis=0).repeat(len(anchors[0]) // 2, axis=0)
    grid = np.expand_dims(grid, axis=0) - 0.5 #add grid offset, i.e. y = 2.0 * x - 0.5
    # anchor_grid = np.array([anchor*strides[i] for anchor in anchors[i]]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
    anchor_grid = np.array(anchors[i]).reshape((1, len(anchors[0]) // 2, 1, 1, 2))
    anchor_grid = anchor_grid.repeat(ny, axis=2).repeat(nx, axis=3)
    # print(anchor_grid.shape, shape)
    return grid, anchor_grid

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def xywh2xyxy(x):
    """Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y

def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), auto=False, scaleFill=False, scaleup=True, stride=32):
    """Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border for 2 sides
    # im = cv2.copyMakeBorder(im, 0, int(dh), 0, int(dw), cv2.BORDER_CONSTANT, value=color)  # add border for right and bottom

    return im, ratio, (dw, dh)

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    """Rescales (xyxy) bounding boxes from img1_shape to img0_shape, optionally using provided `ratio_pad`."""
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
        # pad = (0, 0)
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]  # x padding
    boxes[..., [1, 3]] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    return boxes

def nms(boxes, iou_thresh=0.65):
    xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    score = boxes[:, 4]
    areas = (xmax - xmin + 1)*(ymax - ymin + 1)
    order = score.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xxmin = np.maximum(xmin[i], xmin[order[1:]])
        yymin = np.maximum(ymin[i], ymin[order[1:]])
        xxmax = np.minimum(xmax[i], xmax[order[1:]])
        yymax = np.minimum(ymax[i], ymax[order[1:]])

        w = np.maximum(0, xxmax - xxmin + 1)
        h = np.maximum(0, yymax - yymin + 1)
        inter = w * h

        iou = inter / (areas[i] + areas[order[1:]] - inter)
        order = order[np.where(iou <= iou_thresh)[0] + 1] #索引需要加1

    return boxes[keep, :]

def nms_multi(boxes, conf_thresh=0.25, iou_thresh=0.65, max_num=300):
    if len(boxes) == 0:
        return boxes
    
    boxes = boxes[np.where(boxes[:, 4] > conf_thresh)]
    result = list()

    cls_score = boxes[:, 5:]
    max_cls_index = np.argmax(cls_score, axis=-1)
    max_cls_score = np.max(cls_score, axis=-1)

    dets = np.concatenate([ boxes[:, :4],          # 0:3 坐标
                            boxes[:, 4:5],         # 4 obj
                            max_cls_score[:, np.newaxis],  # 5 最大类别分数
                            max_cls_index[:, np.newaxis]], axis=-1)  # 6 类别
    dets[:, 4] = dets[:, 4] * dets[:, 5]
    max_det = dets[:, 4].argsort()[::-1][:max_num]
    dets = dets[max_det, :]
    dets = dets[:, [0, 1, 2, 3, 4, 6]]
    unique_label = np.unique(max_cls_index)

    for c in unique_label:
        det = dets[dets[:, -1] == c]
        nmsed_det = nms(det, iou_thresh=iou_thresh)
        if len(nmsed_det):
            result.append(nmsed_det)

    if len(result):
        result = np.concatenate(result, axis=0)
        return result
    else:
        return []

def onnx_inference(opt):
    session = ort.InferenceSession(opt.model)

    input_name = session.get_inputs()[0].name
    output_name = [output.name for output in session.get_outputs()]
    print(f"Input_name: {input_name}, Output_name: {output_name}")

    img = cv2.imread(f'{opt.img}')
    t1 = time.time()
    img_letter, ratio, (dw, dh) = letterbox(img, opt.imgsz) # h w c
    input_data = np.expand_dims(img_letter, axis=0)[..., ::-1].transpose((0, 3, 1, 2))
    t2 = time.time()
    print(f"Preprocess time: {(t2-t1)*1000:.2f} ms")
    t3 = time.time()
    outputs = session.run(output_name, {input_name:input_data})
    t4 = time.time()
    print(f"Inference time: {(t4-t3)*1000:.2f} ms")

    num_anchor = len(opt.anchors[0]) // 2
    channel = len(opt.classes) + 5
    predictions = list()

    for i, output in enumerate(outputs):
        bs, _, ny, nx = output.shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
        output = sigmoid(output.reshape(bs, num_anchor, channel, ny, nx).transpose(0, 1, 3, 4, 2))

        grid, anchor_grid = make_grid(nx, ny, i, opt.strides, opt.anchors)

        xy, wh, conf = output[..., :2], output[..., 2:4], output[..., 4:]
        
        xy = (xy * 2 + grid) * opt.strides[i]  # xy
        wh = (wh * 2) ** 2 * anchor_grid  # wh

        prediction = np.concatenate((xy, wh, conf), 4)
        prediction = prediction.reshape(bs, num_anchor * nx * ny, channel)

        prediction = xywh2xyxy(prediction)
        prediction[..., 0:4:2] = np.clip(prediction[..., 0:4:2], a_min=0, a_max=opt.imgsz[1])
        prediction[..., 1:4:2] = np.clip(prediction[..., 1:4:2], a_min=0, a_max=opt.imgsz[0])

        predictions.append(prediction)

    predictions = np.concatenate(predictions, axis=1).squeeze()
    # predictions format: [x1, y1, x2, y2, obj, cls_score, obj*cls_score, label]
    predictions = nms_multi(predictions) #TODO multi label for one box
    if len(predictions) > 0:
        predictions[:, :4] = scale_boxes(img_letter.shape[:2], predictions[:, :4], img.shape).round()
    else:
        print("no target")

    print(f"Total detect {len(predictions)} objects")
    for i, (x1, y1, x2, y2, conf, label) in enumerate(predictions):
        print(f"{i}: {opt.classes[int(label)]}\t {conf:.3f} [{round(x1,1)}, {round(y1,1)}, {round(x2,1)}, {round(y2,1)}]")

    if opt.vis:
        box_xyxy = predictions[:, :4].astype(np.int32)
        scores = predictions[:, -2]
        labels = predictions[:, -1].astype(np.int32)

        for (x1, y1, x2, y2), score, label in zip(box_xyxy, scores, labels):
            img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            text_size = cv2.getTextSize(f"{opt.classes[label]}:{score:.3f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            if (y1 - text_size[0][1]) < 0:
                textxy = (x1, y1 + text_size[0][1])
            else:
                textxy = (x1, y1)
            cv2.putText(img, f"{opt.classes[label]}:{score:.3f}", textxy, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
        cv2.imwrite(f'{opt.save_name}', img)

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="./AX650/ax_ax650_hel_algo_V1.0.0.axmodel", help="onnx model path")
    parser.add_argument("--img", type=str, default="./test.jpg", help="img_path")
    parser.add_argument("--anchors", type=float, default=[[31,28, 38,32, 60,83],[84,110, 133,118, 200,113]], help="anchor based anchors")
    parser.add_argument("--strides", type=float, default=[8, 16], help="model strides")
    parser.add_argument("--imgsz", "--img-size", nargs="+", type=int, default=[256, 192], help="inference size h,w")
    parser.add_argument("--classes", type=str, default=["helmet", "head", "e-bike", "bike"], help="classes num")
    parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
    parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
    parser.add_argument("--max-det", type=int, default=50, help="maximum detections per image")
    parser.add_argument("--vis", default=True, help="visualize detect result")
    parser.add_argument("--save_name", type=str, default="./out.jpg", help="detect img save path")
    opt = parser.parse_args()
    return opt

if __name__ == "__main__":

    opt = parse_opt()
    onnx_inference(opt)