Instructions to use jxu124/SInViG-240117-0 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use jxu124/SInViG-240117-0 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-feature-extraction", model="jxu124/SInViG-240117-0", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("jxu124/SInViG-240117-0", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from PIL import Image as PILImage | |
| from transformers import AutoModel, AutoTokenizer, AutoImageProcessor | |
| import re | |
| import cv2 | |
| import numpy as np | |
| class Utils(): | |
| def xywh2xyxy(b): | |
| b[..., 2:] += b[..., :2] | |
| return b | |
| def bbox_to_sbbox(bbox): | |
| # xyxy in [0, 1] | |
| assert len(bbox) == 4 | |
| bbox = (np.asarray(bbox) * 1000).astype(np.int16) | |
| bbox = np.clip(bbox, 0, 999) | |
| bbox = " ".join([f"<bin_{i}>" for i in bbox]) | |
| return bbox | |
| def sbbox_to_bbox(sbbox, set_default: bool = False): | |
| sbbox = [re.findall(r"<bin_(\d+)>", s)[:4] for s in sbbox.split("region:")] | |
| bbox = np.asarray([s for s in sbbox if len(s) >= 4], dtype=int) | |
| if set_default and not len(bbox): | |
| bbox = np.asarray([0, 0, 1000, 1000]) | |
| bbox = np.clip(bbox / 1000, 1e-5, 1 - 1e-5) | |
| return bbox.reshape(-1, 4) | |
| def make_dialog_context(dialog: list, text_human: str = None) -> str: | |
| # dialog: [("pass me an apple.", "which apple do you want?"), ...] | |
| context = "".join([f"human: {d[0]}\nagent: {d[1]}\n" for d in dialog]) | |
| if text_human is not None: | |
| context += f"human: {text_human}" | |
| return context | |
| def show_mask(image: PILImage.Image, bboxes=None, masks=None, show_id=False, text_size=1) -> PILImage.Image: | |
| """ 给图片画上mask: 只更改被mask标记部分的rgb值. """ | |
| import colorsys | |
| colors = [tuple(int(c * 255) for c in colorsys.hsv_to_rgb(i * 1.0 / 36, 1, 1)) for i in range(36)] | |
| size = image.size | |
| image = np.asarray(image) | |
| if bboxes is not None: | |
| bboxes = np.array(bboxes).reshape(-1, 4) | |
| for k, bbox in enumerate(bboxes): | |
| bbox = (np.asarray(bbox) * np.asarray([*size, *size])).astype(int) | |
| image = cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), tuple(colors[k]), thickness=2) | |
| if show_id: | |
| for k, bbox in enumerate(bboxes): | |
| bbox = (np.asarray(bbox) * np.asarray([*size, *size])).astype(int) | |
| image = cv2.putText(image, str(k), tuple(bbox[:2] + np.array([2, 28 * text_size])), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 2, cv2.LINE_AA) | |
| image = cv2.putText(image, str(k), tuple(bbox[:2] + np.array([2, 28 * text_size])), cv2.FONT_HERSHEY_SIMPLEX, text_size, tuple(colors[k%len(colors)]), 1, cv2.LINE_AA) | |
| if masks is not None: | |
| for k, mask in enumerate(masks): | |
| mask_color = (mask[..., None] * colors[k%len(colors)][:3]).astype(np.uint8) | |
| image_mask = cv2.addWeighted(mask_color, 0.5, image * mask[..., None], 0.5, 0) | |
| image = cv2.add(image * ~mask[..., None], image_mask) | |
| return PILImage.fromarray(image) | |