Text Generation
Transformers
ONNX
Safetensors
opt
trl
sft
optimum
danbooru
text-generation-inference
Instructions to use p1atdev/dart-v1-sft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use p1atdev/dart-v1-sft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="p1atdev/dart-v1-sft")# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("p1atdev/dart-v1-sft") model = AutoModelForMultimodalLM.from_pretrained("p1atdev/dart-v1-sft") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use p1atdev/dart-v1-sft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "p1atdev/dart-v1-sft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "p1atdev/dart-v1-sft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/p1atdev/dart-v1-sft
- SGLang
How to use p1atdev/dart-v1-sft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "p1atdev/dart-v1-sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "p1atdev/dart-v1-sft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "p1atdev/dart-v1-sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "p1atdev/dart-v1-sft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use p1atdev/dart-v1-sft with Docker Model Runner:
docker model run hf.co/p1atdev/dart-v1-sft
| import logging | |
| from typing import List | |
| from transformers import PreTrainedTokenizerFast | |
| from tokenizers.decoders import Decoder | |
| logger = logging.getLogger(__name__) | |
| # fmt: off | |
| # https://huggingface.co/docs/transformers/main/en/chat_templating | |
| PROMPT_TEMPLATE = ( | |
| "{{ '<|bos|>' }}" | |
| "{{ '<rating>' }}" | |
| "{% if 'rating' not in messages or messages['rating'] is none %}" | |
| "{{ 'rating:sfw, rating:general' }}" | |
| "{% else %}" | |
| "{{ messages['rating'] }}" | |
| "{% endif %}" | |
| "{{ '</rating>' }}" | |
| "{{ '<copyright>' }}" | |
| "{% if 'copyright' not in messages or messages['copyright'] is none %}" | |
| "{{ '' }}" | |
| "{% else %}" | |
| "{{ messages['copyright'] }}" | |
| "{% endif %}" | |
| "{{ '</copyright>' }}" | |
| "{{ '<character>' }}" | |
| "{% if 'character' not in messages or messages['character'] is none %}" | |
| "{{ '' }}" | |
| "{% else %}" | |
| "{{ messages['character'] }}" | |
| "{% endif %}" | |
| "{{ '</character>' }}" | |
| "{{ '<general>' }}" | |
| # length token | |
| "{% if 'length' not in messages or messages['length'] is none %}" | |
| "{{ '<|long|>' }}" | |
| "{% else %}" | |
| "{{ messages['length'] }}" | |
| "{% endif %}" | |
| # general token | |
| "{% if 'general' not in messages or messages['general'] is none %}" | |
| "{{ '' }}" | |
| "{% else %}" | |
| "{{ messages['general'] }}" | |
| "{% endif %}" | |
| "{{ '<|input_end|>' }}" | |
| ).strip() | |
| # fmt: on | |
| class DartDecoder: | |
| def __init__(self, special_tokens: List[str]): | |
| self.special_tokens = list(special_tokens) | |
| def decode_chain(self, tokens: List[str]) -> List[str]: | |
| new_tokens = [] | |
| is_specials = [] | |
| for i, token in enumerate(tokens): | |
| is_specials.append(token in self.special_tokens) | |
| if i == 0: | |
| new_tokens.append(token) | |
| continue | |
| # this token or previous token is special | |
| if is_specials[i] or is_specials[i - 1]: | |
| new_tokens.append(token) | |
| continue | |
| new_tokens.append(f", {token}") | |
| return new_tokens | |
| class DartTokenizer(PreTrainedTokenizerFast): | |
| """Dart tokenizer""" | |
| def __init__(self, **kwargs): | |
| super().__init__(**kwargs) | |
| self._tokenizer.decoder = Decoder.custom( # type: ignore | |
| DartDecoder(list(self.get_added_vocab().keys())) | |
| ) | |
| def default_chat_template(self): | |
| """ | |
| Danbooru Tags Transformer uses special format prompt to generate danbooru tags. | |
| """ | |
| return PROMPT_TEMPLATE | |