Instructions to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="royleibov/granite-3b-code-base-128k-ZipNN-Compressed")

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("royleibov/granite-3b-code-base-128k-ZipNN-Compressed")
model = AutoModelForCausalLM.from_pretrained("royleibov/granite-3b-code-base-128k-ZipNN-Compressed")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "royleibov/granite-3b-code-base-128k-ZipNN-Compressed"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/royleibov/granite-3b-code-base-128k-ZipNN-Compressed

SGLang

How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "royleibov/granite-3b-code-base-128k-ZipNN-Compressed" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "royleibov/granite-3b-code-base-128k-ZipNN-Compressed" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with Docker Model Runner:
```
docker model run hf.co/royleibov/granite-3b-code-base-128k-ZipNN-Compressed
```

granite-3b-code-base-128k-ZipNN-Compressed / zipnn_decompress_path.py

royleibov

Add .znn files

cc8acc6 over 1 year ago

raw

history blame contribute delete

9.34 kB

	import os
	import sys
	import argparse
	import subprocess
	from pathlib import Path
	from concurrent.futures import (
	ProcessPoolExecutor,
	as_completed,
	)
	from zipnn_decompress_file import (
	decompress_file,
	)

	sys.path.append(
	os.path.abspath(
	os.path.join(
	os.path.dirname(__file__),
	"..",
	)
	)
	)

	RED = "\033[91m"
	YELLOW = "\033[93m"
	GREEN = "\033[92m"
	RESET = "\033[0m"

	def check_and_install_zipnn():
	try:
	import zipnn
	except ImportError:
	print("zipnn not found. Installing...")
	subprocess.check_call(
	[
	sys.executable,
	"-m",
	"pip",
	"install",
	"zipnn",
	]
	)
	import zipnn

	def replace_in_file(file_path, old: str, new: str) -> None:
	"""Given a file_path, replace all occurrences of `old` with `new` inpalce."""

	with open(file_path, 'r') as file:
	file_data = file.read()

	file_data = file_data.replace(old, new)

	with open(file_path, 'w') as file:
	file.write(file_data)

	def decompress_znn_files(
	path=".",
	delete=False,
	force=False,
	max_processes=1,
	hf_cache=False,
	model="",
	branch="main",
	):
	import zipnn

	overwrite_first=True

	if model:
	if not hf_cache:
	raise ValueError(
	"Must specify --hf_cache when using --model"
	)
	try:
	from huggingface_hub import scan_cache_dir
	except ImportError:
	raise ImportError(
	"huggingface_hub not found. Please pip install huggingface_hub."
	)
	cache = scan_cache_dir()
	repo = next((repo for repo in cache.repos if repo.repo_id == model), None)

	if repo is not None:
	print(f"Found repo {model} in cache")

	# Get the latest revision path
	hash = ''
	try:
	with open(os.path.join(repo.repo_path, 'refs', branch), "r") as ref:
	hash = ref.read()
	except FileNotFoundError:
	raise FileNotFoundError(f"Branch {branch} not found in repo {model}")

	path = os.path.join(repo.repo_path, 'snapshots', hash)

	file_list = []
	directories_to_search = [
	(
	path,
	[],
	os.listdir(path),
	)
	]
	for (
	root,
	_,
	files,
	) in directories_to_search:
	for file_name in files:
	if file_name.endswith(".znn"):
	decompressed_path = file_name[:-4]
	if not force and os.path.exists(
	decompressed_path
	):
	#
	if overwrite_first:
	overwrite_first=False
	user_input = (
	input(
	f"Decompressed files already exists; Would you like to overwrite them all (y/n)? "
	)
	.strip()
	.lower()
	)
	if user_input not in (
	"y",
	"yes",
	):
	print(
	f"No forced overwriting."
	)
	else:
	print(
	f"Overwriting all decompressed files."
	)
	force=True

	#
	if not force:
	user_input = (
	input(
	f"{decompressed_path} already exists; overwrite (y/n)? "
	)
	.strip()
	.lower()
	)
	if user_input not in (
	"y",
	"yes",
	):
	print(
	f"Skipping {file_name}..."
	)
	continue
	full_path = os.path.join(
	root,
	file_name,
	)
	file_list.append(full_path)

	if file_list and hf_cache:
	try:
	from transformers.utils import (
	SAFE_WEIGHTS_INDEX_NAME,
	WEIGHTS_INDEX_NAME
	)
	except ImportError:
	raise ImportError(
	"Transformers not found. Please pip install transformers."
	)

	suffix = file_list[0].split('/')[-1].split('.')[-2] # get the one before .znn

	if os.path.exists(os.path.join(path, SAFE_WEIGHTS_INDEX_NAME)):
	print(f"{YELLOW}Fixing Hugging Face model json...{RESET}")
	blob_name = os.path.join(path, os.readlink(os.path.join(path, SAFE_WEIGHTS_INDEX_NAME)))
	replace_in_file(
	file_path=blob_name,
	old=f"{suffix}.znn",
	new=f"{suffix}"
	)
	elif os.path.exists(os.path.join(path, WEIGHTS_INDEX_NAME)):
	print(f"{YELLOW}Fixing Hugging Face model json...{RESET}")
	blob_name = os.path.join(path, os.readlink(os.path.join(path, WEIGHTS_INDEX_NAME)))
	replace_in_file(
	file_path=blob_name,
	old=f"{suffix}.znn",
	new=f"{suffix}"
	)

	with ProcessPoolExecutor(
	max_workers=max_processes
	) as executor:
	for file in file_list[:max_processes]:
	future_to_file = {
	executor.submit(
	decompress_file,
	file,
	delete,
	True,
	hf_cache,
	): file
	for file in file_list[
	:max_processes
	]
	}

	file_list = file_list[max_processes:]
	while future_to_file:

	for future in as_completed(
	future_to_file
	):
	file = future_to_file.pop(
	future
	)
	try:
	future.result()
	except Exception as exc:
	print(
	f"{RED}File {file} generated an exception: {exc}{RESET}"
	)

	if file_list:
	next_file = file_list.pop(
	0
	)
	future_to_file[
	executor.submit(
	decompress_file,
	next_file,
	delete,
	True,
	hf_cache,
	)
	] = next_file
	#
	print(f"{GREEN}All files decompressed{RESET}")


	if __name__ == "__main__":
	check_and_install_zipnn()

	parser = argparse.ArgumentParser(
	description="Compresses all .znn files."
	)
	parser.add_argument(
	"--path",
	type=str,
	help="Path to folder of files to decompress. If left empty, checks current folder.",
	)
	parser.add_argument(
	"--delete",
	action="store_true",
	help="A flag that triggers deletion of a single compressed file instead of decompression",
	)
	parser.add_argument(
	"--force",
	action="store_true",
	help="A flag that forces overwriting when decompressing.",
	)
	parser.add_argument(
	"--max_processes",
	type=int,
	help="The amount of maximum processes.",
	)
	parser.add_argument(
	"--hf_cache",
	action="store_true",
	help="A flag that indicates if the file is in the Hugging Face cache. Must either specify --model or --path to the model's snapshot cache.",
	)
	parser.add_argument(
	"--model",
	type=str,
	help="Only when using --hf_cache, specify the model name or path. E.g. 'ibm-granite/granite-7b-instruct'",
	)
	parser.add_argument(
	"--model_branch",
	type=str,
	default="main",
	help="Only when using --model, specify the model branch. Default is 'main'",
	)
	args = parser.parse_args()
	optional_kwargs = {}
	if args.path is not None:
	optional_kwargs["path"] = args.path
	if args.delete:
	optional_kwargs["delete"] = args.delete
	if args.force:
	optional_kwargs["force"] = args.force
	if args.max_processes:
	optional_kwargs["max_processes"] = (
	args.max_processes
	)
	if args.hf_cache:
	optional_kwargs["hf_cache"] = args.hf_cache
	if args.model:
	optional_kwargs["model"] = args.model
	if args.model_branch:
	optional_kwargs[
	"branch"
	] = args.model_branch

	decompress_znn_files(**optional_kwargs)