Instructions to use philipp-zettl/MolmoE-1B-0924 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use philipp-zettl/MolmoE-1B-0924 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="philipp-zettl/MolmoE-1B-0924", trust_remote_code=True)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("philipp-zettl/MolmoE-1B-0924", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use philipp-zettl/MolmoE-1B-0924 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "philipp-zettl/MolmoE-1B-0924"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "philipp-zettl/MolmoE-1B-0924",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/philipp-zettl/MolmoE-1B-0924

SGLang

How to use philipp-zettl/MolmoE-1B-0924 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "philipp-zettl/MolmoE-1B-0924" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "philipp-zettl/MolmoE-1B-0924",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "philipp-zettl/MolmoE-1B-0924" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "philipp-zettl/MolmoE-1B-0924",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use philipp-zettl/MolmoE-1B-0924 with Docker Model Runner:
```
docker model run hf.co/philipp-zettl/MolmoE-1B-0924
```

MolmoE-1B-0924 / config.json

philipp-zettl

Duplicate from allenai/MolmoE-1B-0924

56ba74f verified over 1 year ago

raw

history blame contribute delete

3.52 kB

	{
	"auto_map": {
	"AutoConfig": "config_molmoe.MolmoConfig",
	"AutoModelForCausalLM": "modeling_molmoe.MolmoForCausalLM"
	},
	"activation_type": "swiglu",
	"additional_vocab_size": 128,
	"alibi": false,
	"alibi_bias_max": 8.0,
	"always_start_with_space": true,
	"architectures": [
	"OLMoForCausalLM"
	],
	"attention_dropout": 0.0,
	"attention_layer_norm": true,
	"attention_layer_norm_with_affine": true,
	"attention_type": "sdpa",
	"attn_logit_softcapping": null,
	"bias_for_layer_norm": false,
	"block_group_size": 1,
	"block_type": "moe",
	"clip_qkv": null,
	"crop_mode": "overlap-and-resize-c2",
	"d_model": 2048,
	"default_inference_len": 65,
	"do_random_scale": false,
	"embedding_dropout": 0.0,
	"embedding_size": 50304,
	"final_logit_softcapping": null,
	"fix_image_input_idx": 2,
	"float32_attention": true,
	"gin_bindings": null,
	"head_dim": null,
	"image_feature_dropout": 0.0,
	"image_padding_embed": "pad_and_partial_pad",
	"image_pooling_2d": "attention-meanq",
	"image_pooling_h": 2,
	"image_pooling_w": 2,
	"image_projector": "mlp",
	"include_bias": false,
	"init_cutoff_factor": 3.0,
	"init_device": "meta",
	"init_fn": "normal",
	"init_std": 0.02,
	"initializer_range": 0.02,
	"layer_norm_eps": 1e-05,
	"layer_norm_type": "rms",
	"layer_norm_with_affine": true,
	"llm_load_path": null,
	"loss_token_weighting": "root_subsegments",
	"low_cpu_fsdp": true,
	"max_crops": 12,
	"max_position_embeddings": 32768,
	"max_sequence_length": 4096,
	"message_formatting": "role",
	"mlp_hidden_size": null,
	"mlp_ratio": 1,
	"model_type": "molmo",
	"moe_capacity_factor": 1.25,
	"moe_dropless": true,
	"moe_interleave": false,
	"moe_lbl_in_fp32": false,
	"moe_log_expert_assignment": false,
	"moe_loss_weight": 0.0,
	"moe_mlp_impl": "sparse",
	"moe_num_experts": 64,
	"moe_shared_expert": false,
	"moe_top_k": 8,
	"moe_zloss_weight": 0.0,
	"multi_query_attention": null,
	"n_heads": 16,
	"n_kv_heads": null,
	"n_layers": 16,
	"new_embedding_init_range": 0.02,
	"norm_after": false,
	"normalize_input_embeds": false,
	"overlap_margins": [
	4,
	4
	],
	"pad_to": null,
	"pad_token_id": 1,
	"pad_tokenizer": false,
	"precision": "amp_bf16",
	"prompt_override": null,
	"prompt_type": "uber_model",
	"qkv_bias": false,
	"query_pre_attn_scalar": 224,
	"residual_dropout": 0.1,
	"response_attention_dropout": 0.0,
	"response_residual_dropout": 0.0,
	"rope": true,
	"rope_full_precision": true,
	"rope_impl": "llama",
	"rope_theta": 10000.0,
	"scale_logits": false,
	"system_prompt_kind": "demo_or_style",
	"transformers_version": "4.45.0.dev0",
	"unconditioned": false,
	"use_cache": true,
	"use_cls_feature": false,
	"use_col_tokens": true,
	"use_position_ids": true,
	"vision_backbone": {
	"attention_dropout": 0.0,
	"fsdp_wrap": false,
	"image_default_input_size": [
	336,
	336
	],
	"image_dropout_rate": 0.0,
	"image_emb_dim": 1024,
	"image_head_dim": 64,
	"image_mlp_activations": "quick_gelu",
	"image_mlp_dim": 4096,
	"image_model_type": "openai",
	"image_norm_eps": 1e-05,
	"image_num_heads": 16,
	"image_num_key_value_heads": 16,
	"image_num_layers": 23,
	"image_num_pos": 577,
	"image_patch_size": 14,
	"image_pos_patch_size": 14,
	"initializer_range": 0.02,
	"residual_dropout": 0.0,
	"resize_mode": "default"
	},
	"vit_layers": [
	-2,
	-9
	],
	"vit_load_path": null,
	"vocab_size": 50280,
	"weight_tying": false
	}