Image-Text-to-Text
Transformers
Safetensors
English
molmo
text-generation
multimodal
olmo
pixmo
conversational
custom_code
Instructions to use allenai/Molmo-7B-O-0924 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use allenai/Molmo-7B-O-0924 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="allenai/Molmo-7B-O-0924", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("allenai/Molmo-7B-O-0924", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use allenai/Molmo-7B-O-0924 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "allenai/Molmo-7B-O-0924" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "allenai/Molmo-7B-O-0924", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/allenai/Molmo-7B-O-0924
- SGLang
How to use allenai/Molmo-7B-O-0924 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "allenai/Molmo-7B-O-0924" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "allenai/Molmo-7B-O-0924", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "allenai/Molmo-7B-O-0924" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "allenai/Molmo-7B-O-0924", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use allenai/Molmo-7B-O-0924 with Docker Model Runner:
docker model run hf.co/allenai/Molmo-7B-O-0924
Upload preprocessing_molmo.py with huggingface_hub
Browse files- preprocessing_molmo.py +19 -3
preprocessing_molmo.py
CHANGED
|
@@ -4,6 +4,10 @@ Processor class for Molmo.
|
|
| 4 |
|
| 5 |
from typing import Optional
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
try:
|
| 8 |
from typing import Unpack
|
| 9 |
except ImportError:
|
|
@@ -23,7 +27,7 @@ from transformers.tokenization_utils_base import TextInput
|
|
| 23 |
from transformers.utils import logging
|
| 24 |
|
| 25 |
from transformers import AutoTokenizer
|
| 26 |
-
from .image_preprocessing_molmo import MolmoImagesKwargs,
|
| 27 |
|
| 28 |
|
| 29 |
logger = logging.get_logger(__name__)
|
|
@@ -129,8 +133,20 @@ class MolmoProcessor(ProcessorMixin):
|
|
| 129 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 130 |
|
| 131 |
if images is not None:
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# For now only support inserting images at the start
|
| 135 |
image_idx = [-1]*len(images)
|
| 136 |
else:
|
|
|
|
| 4 |
|
| 5 |
from typing import Optional
|
| 6 |
|
| 7 |
+
import PIL
|
| 8 |
+
from PIL import ImageOps
|
| 9 |
+
from PIL.Image import Image
|
| 10 |
+
|
| 11 |
try:
|
| 12 |
from typing import Unpack
|
| 13 |
except ImportError:
|
|
|
|
| 27 |
from transformers.utils import logging
|
| 28 |
|
| 29 |
from transformers import AutoTokenizer
|
| 30 |
+
from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
|
| 31 |
|
| 32 |
|
| 33 |
logger = logging.get_logger(__name__)
|
|
|
|
| 133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 134 |
|
| 135 |
if images is not None:
|
| 136 |
+
if not isinstance(images, (list, tuple)):
|
| 137 |
+
images = [images]
|
| 138 |
+
image_arrays = []
|
| 139 |
+
for image in images:
|
| 140 |
+
if isinstance(image, Image):
|
| 141 |
+
image = image.convert("RGB")
|
| 142 |
+
# Handle images with EXIF orientation tags, which PIL will ignore by default
|
| 143 |
+
# https://github.com/python-pillow/Pillow/issues/4703
|
| 144 |
+
img = ImageOps.exif_transpose(image)
|
| 145 |
+
image_arrays.append(np.array(image))
|
| 146 |
+
else:
|
| 147 |
+
assert len(image.shape) == 3 and image.shape[-1] == 3
|
| 148 |
+
image_arrays.append(image.astype(np.uint8))
|
| 149 |
+
images = image_arrays
|
| 150 |
# For now only support inserting images at the start
|
| 151 |
image_idx = [-1]*len(images)
|
| 152 |
else:
|