Instructions to use Qwen/Qwen-VL with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Qwen/Qwen-VL with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Qwen/Qwen-VL", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Qwen/Qwen-VL with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Qwen/Qwen-VL" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Qwen/Qwen-VL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Qwen/Qwen-VL
- SGLang
How to use Qwen/Qwen-VL with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Qwen/Qwen-VL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Qwen/Qwen-VL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Qwen/Qwen-VL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Qwen/Qwen-VL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Qwen/Qwen-VL with Docker Model Runner:
docker model run hf.co/Qwen/Qwen-VL
Update tokenization_qwen.py
Browse files- tokenization_qwen.py +19 -7
tokenization_qwen.py
CHANGED
|
@@ -27,12 +27,6 @@ logger = logging.getLogger(__name__)
|
|
| 27 |
|
| 28 |
|
| 29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
| 30 |
-
FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
| 31 |
-
if FONT_PATH is None:
|
| 32 |
-
if not os.path.exists("SimSun.ttf"):
|
| 33 |
-
ttf = requests.get("https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/SimSun.ttf")
|
| 34 |
-
open("SimSun.ttf", "wb").write(ttf.content)
|
| 35 |
-
FONT_PATH = "SimSun.ttf"
|
| 36 |
|
| 37 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
| 38 |
ENDOFTEXT = "<|endoftext|>"
|
|
@@ -175,6 +169,24 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
| 175 |
self.im_start_id = self.special_tokens[IMSTART]
|
| 176 |
self.im_end_id = self.special_tokens[IMEND]
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
def __len__(self) -> int:
|
| 179 |
return self.tokenizer.n_vocab
|
| 180 |
|
|
@@ -503,7 +515,7 @@ class VisImage:
|
|
| 503 |
class Visualizer:
|
| 504 |
def __init__(self, img_rgb, metadata=None, scale=1.0):
|
| 505 |
self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
|
| 506 |
-
self.font_path =
|
| 507 |
self.output = VisImage(self.img, scale=scale)
|
| 508 |
self.cpu_device = torch.device("cpu")
|
| 509 |
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
| 32 |
ENDOFTEXT = "<|endoftext|>"
|
|
|
|
| 169 |
self.im_start_id = self.special_tokens[IMSTART]
|
| 170 |
self.im_end_id = self.special_tokens[IMEND]
|
| 171 |
|
| 172 |
+
def __getstate__(self):
|
| 173 |
+
# for pickle lovers
|
| 174 |
+
state = self.__dict__.copy()
|
| 175 |
+
del state['tokenizer']
|
| 176 |
+
return state
|
| 177 |
+
|
| 178 |
+
def __setstate__(self, state):
|
| 179 |
+
# tokenizer is not python native; don't pass it; rebuild it
|
| 180 |
+
self.__dict__.update(state)
|
| 181 |
+
enc = tiktoken.Encoding(
|
| 182 |
+
"Qwen",
|
| 183 |
+
pat_str=PAT_STR,
|
| 184 |
+
mergeable_ranks=self.mergeable_ranks,
|
| 185 |
+
special_tokens=self.special_tokens,
|
| 186 |
+
)
|
| 187 |
+
self.tokenizer = enc
|
| 188 |
+
|
| 189 |
+
|
| 190 |
def __len__(self) -> int:
|
| 191 |
return self.tokenizer.n_vocab
|
| 192 |
|
|
|
|
| 515 |
class Visualizer:
|
| 516 |
def __init__(self, img_rgb, metadata=None, scale=1.0):
|
| 517 |
self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
|
| 518 |
+
self.font_path = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
| 519 |
self.output = VisImage(self.img, scale=scale)
|
| 520 |
self.cpu_device = torch.device("cpu")
|
| 521 |
|