Instructions to use NimVideo/cogvideox-2b-img2vid with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use NimVideo/cogvideox-2b-img2vid with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("NimVideo/cogvideox-2b-img2vid", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| { | |
| "last_node_id": 58, | |
| "last_link_id": 129, | |
| "nodes": [ | |
| { | |
| "id": 31, | |
| "type": "CogVideoTextEncode", | |
| "pos": { | |
| "0": 497, | |
| "1": 520 | |
| }, | |
| "size": { | |
| "0": 463.01251220703125, | |
| "1": 124 | |
| }, | |
| "flags": {}, | |
| "order": 4, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "clip", | |
| "type": "CLIP", | |
| "link": 56 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "conditioning", | |
| "type": "CONDITIONING", | |
| "links": [ | |
| 123 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CogVideoTextEncode" | |
| }, | |
| "widgets_values": [ | |
| "The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ", | |
| 1, | |
| true | |
| ] | |
| }, | |
| { | |
| "id": 37, | |
| "type": "ImageResizeKJ", | |
| "pos": { | |
| "0": 809, | |
| "1": 684 | |
| }, | |
| "size": { | |
| "0": 315, | |
| "1": 266 | |
| }, | |
| "flags": {}, | |
| "order": 5, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "image", | |
| "type": "IMAGE", | |
| "link": 71 | |
| }, | |
| { | |
| "name": "get_image_size", | |
| "type": "IMAGE", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "width_input", | |
| "type": "INT", | |
| "link": null, | |
| "widget": { | |
| "name": "width_input" | |
| } | |
| }, | |
| { | |
| "name": "height_input", | |
| "type": "INT", | |
| "link": null, | |
| "widget": { | |
| "name": "height_input" | |
| } | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "IMAGE", | |
| "type": "IMAGE", | |
| "links": [ | |
| 125 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| }, | |
| { | |
| "name": "width", | |
| "type": "INT", | |
| "links": null, | |
| "shape": 3 | |
| }, | |
| { | |
| "name": "height", | |
| "type": "INT", | |
| "links": null, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "ImageResizeKJ" | |
| }, | |
| "widgets_values": [ | |
| 720, | |
| 480, | |
| "lanczos", | |
| false, | |
| 16, | |
| 0, | |
| 0, | |
| "disabled" | |
| ] | |
| }, | |
| { | |
| "id": 58, | |
| "type": "CogVideoImageEncode", | |
| "pos": { | |
| "0": 1156, | |
| "1": 650 | |
| }, | |
| "size": { | |
| "0": 315, | |
| "1": 122 | |
| }, | |
| "flags": {}, | |
| "order": 6, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "pipeline", | |
| "type": "COGVIDEOPIPE", | |
| "link": 124 | |
| }, | |
| { | |
| "name": "image", | |
| "type": "IMAGE", | |
| "link": 125 | |
| }, | |
| { | |
| "name": "mask", | |
| "type": "MASK", | |
| "link": null, | |
| "shape": 7 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "samples", | |
| "type": "LATENT", | |
| "links": [ | |
| 129 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CogVideoImageEncode" | |
| }, | |
| "widgets_values": [ | |
| 16, | |
| true | |
| ] | |
| }, | |
| { | |
| "id": 56, | |
| "type": "CogVideoDecode", | |
| "pos": { | |
| "0": 1581, | |
| "1": 148 | |
| }, | |
| "size": { | |
| "0": 300.396484375, | |
| "1": 198 | |
| }, | |
| "flags": {}, | |
| "order": 8, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "pipeline", | |
| "type": "COGVIDEOPIPE", | |
| "link": 128 | |
| }, | |
| { | |
| "name": "samples", | |
| "type": "LATENT", | |
| "link": 127 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "images", | |
| "type": "IMAGE", | |
| "links": [ | |
| 118 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CogVideoDecode" | |
| }, | |
| "widgets_values": [ | |
| false, | |
| 240, | |
| 360, | |
| 0.2, | |
| 0.2, | |
| true | |
| ] | |
| }, | |
| { | |
| "id": 20, | |
| "type": "CLIPLoader", | |
| "pos": { | |
| "0": -26, | |
| "1": 400 | |
| }, | |
| "size": { | |
| "0": 451.30548095703125, | |
| "1": 82 | |
| }, | |
| "flags": {}, | |
| "order": 0, | |
| "mode": 0, | |
| "inputs": [], | |
| "outputs": [ | |
| { | |
| "name": "CLIP", | |
| "type": "CLIP", | |
| "links": [ | |
| 54, | |
| 56 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CLIPLoader" | |
| }, | |
| "widgets_values": [ | |
| "t5/google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors", | |
| "sd3" | |
| ] | |
| }, | |
| { | |
| "id": 30, | |
| "type": "CogVideoTextEncode", | |
| "pos": { | |
| "0": 493, | |
| "1": 303 | |
| }, | |
| "size": { | |
| "0": 471.90142822265625, | |
| "1": 168.08047485351562 | |
| }, | |
| "flags": {}, | |
| "order": 3, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "clip", | |
| "type": "CLIP", | |
| "link": 54 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "conditioning", | |
| "type": "CONDITIONING", | |
| "links": [ | |
| 122 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CogVideoTextEncode" | |
| }, | |
| "widgets_values": [ | |
| "The camera follows before the truck. ", | |
| 1, | |
| true | |
| ] | |
| }, | |
| { | |
| "id": 1, | |
| "type": "DownloadAndLoadCogVideoModel", | |
| "pos": { | |
| "0": 633, | |
| "1": 44 | |
| }, | |
| "size": { | |
| "0": 337.8885192871094, | |
| "1": 194 | |
| }, | |
| "flags": {}, | |
| "order": 1, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "pab_config", | |
| "type": "PAB_CONFIG", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "block_edit", | |
| "type": "TRANSFORMERBLOCKS", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "lora", | |
| "type": "COGLORA", | |
| "link": null, | |
| "shape": 7 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "cogvideo_pipe", | |
| "type": "COGVIDEOPIPE", | |
| "links": [ | |
| 121, | |
| 124 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "DownloadAndLoadCogVideoModel" | |
| }, | |
| "widgets_values": [ | |
| "NimVideo/cogvideox-2b-img2vid", | |
| "fp16", | |
| "disabled", | |
| "disabled", | |
| false | |
| ] | |
| }, | |
| { | |
| "id": 57, | |
| "type": "CogVideoSampler", | |
| "pos": { | |
| "0": 1138, | |
| "1": 150 | |
| }, | |
| "size": { | |
| "0": 399.8780822753906, | |
| "1": 390 | |
| }, | |
| "flags": {}, | |
| "order": 7, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "pipeline", | |
| "type": "COGVIDEOPIPE", | |
| "link": 121 | |
| }, | |
| { | |
| "name": "positive", | |
| "type": "CONDITIONING", | |
| "link": 122 | |
| }, | |
| { | |
| "name": "negative", | |
| "type": "CONDITIONING", | |
| "link": 123 | |
| }, | |
| { | |
| "name": "samples", | |
| "type": "LATENT", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "image_cond_latents", | |
| "type": "LATENT", | |
| "link": 129, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "context_options", | |
| "type": "COGCONTEXT", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "controlnet", | |
| "type": "COGVIDECONTROLNET", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "tora_trajectory", | |
| "type": "TORAFEATURES", | |
| "link": null, | |
| "shape": 7 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "cogvideo_pipe", | |
| "type": "COGVIDEOPIPE", | |
| "links": [ | |
| 128 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| }, | |
| { | |
| "name": "samples", | |
| "type": "LATENT", | |
| "links": [ | |
| 127 | |
| ], | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CogVideoSampler" | |
| }, | |
| "widgets_values": [ | |
| 480, | |
| 720, | |
| 49, | |
| 20, | |
| 6, | |
| 65334758276105, | |
| "fixed", | |
| "CogVideoXDDIM", | |
| 1 | |
| ] | |
| }, | |
| { | |
| "id": 44, | |
| "type": "VHS_VideoCombine", | |
| "pos": { | |
| "0": 1927, | |
| "1": 146 | |
| }, | |
| "size": [ | |
| 605.3909912109375, | |
| 714.2606608072917 | |
| ], | |
| "flags": {}, | |
| "order": 9, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "images", | |
| "type": "IMAGE", | |
| "link": 118 | |
| }, | |
| { | |
| "name": "audio", | |
| "type": "AUDIO", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "meta_batch", | |
| "type": "VHS_BatchManager", | |
| "link": null, | |
| "shape": 7 | |
| }, | |
| { | |
| "name": "vae", | |
| "type": "VAE", | |
| "link": null, | |
| "shape": 7 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "Filenames", | |
| "type": "VHS_FILENAMES", | |
| "links": null, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "VHS_VideoCombine" | |
| }, | |
| "widgets_values": { | |
| "frame_rate": 8, | |
| "loop_count": 0, | |
| "filename_prefix": "CogVideoX-2b-I2V", | |
| "format": "video/h264-mp4", | |
| "pix_fmt": "yuv420p", | |
| "crf": 19, | |
| "save_metadata": true, | |
| "pingpong": false, | |
| "save_output": false, | |
| "videopreview": { | |
| "hidden": false, | |
| "paused": false, | |
| "params": { | |
| "filename": "CogVideoX-2b-I2V_00001.mp4", | |
| "subfolder": "", | |
| "type": "temp", | |
| "format": "video/h264-mp4", | |
| "frame_rate": 8 | |
| }, | |
| "muted": false | |
| } | |
| } | |
| }, | |
| { | |
| "id": 36, | |
| "type": "LoadImage", | |
| "pos": { | |
| "0": 365, | |
| "1": 685 | |
| }, | |
| "size": { | |
| "0": 402.06353759765625, | |
| "1": 396.6225891113281 | |
| }, | |
| "flags": {}, | |
| "order": 2, | |
| "mode": 0, | |
| "inputs": [], | |
| "outputs": [ | |
| { | |
| "name": "IMAGE", | |
| "type": "IMAGE", | |
| "links": [ | |
| 71 | |
| ], | |
| "slot_index": 0, | |
| "shape": 3 | |
| }, | |
| { | |
| "name": "MASK", | |
| "type": "MASK", | |
| "links": null, | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "LoadImage" | |
| }, | |
| "widgets_values": [ | |
| "truck.jpg", | |
| "image" | |
| ] | |
| } | |
| ], | |
| "links": [ | |
| [ | |
| 54, | |
| 20, | |
| 0, | |
| 30, | |
| 0, | |
| "CLIP" | |
| ], | |
| [ | |
| 56, | |
| 20, | |
| 0, | |
| 31, | |
| 0, | |
| "CLIP" | |
| ], | |
| [ | |
| 71, | |
| 36, | |
| 0, | |
| 37, | |
| 0, | |
| "IMAGE" | |
| ], | |
| [ | |
| 118, | |
| 56, | |
| 0, | |
| 44, | |
| 0, | |
| "IMAGE" | |
| ], | |
| [ | |
| 121, | |
| 1, | |
| 0, | |
| 57, | |
| 0, | |
| "COGVIDEOPIPE" | |
| ], | |
| [ | |
| 122, | |
| 30, | |
| 0, | |
| 57, | |
| 1, | |
| "CONDITIONING" | |
| ], | |
| [ | |
| 123, | |
| 31, | |
| 0, | |
| 57, | |
| 2, | |
| "CONDITIONING" | |
| ], | |
| [ | |
| 124, | |
| 1, | |
| 0, | |
| 58, | |
| 0, | |
| "COGVIDEOPIPE" | |
| ], | |
| [ | |
| 125, | |
| 37, | |
| 0, | |
| 58, | |
| 1, | |
| "IMAGE" | |
| ], | |
| [ | |
| 127, | |
| 57, | |
| 1, | |
| 56, | |
| 1, | |
| "LATENT" | |
| ], | |
| [ | |
| 128, | |
| 57, | |
| 0, | |
| 56, | |
| 0, | |
| "COGVIDEOPIPE" | |
| ], | |
| [ | |
| 129, | |
| 58, | |
| 0, | |
| 57, | |
| 4, | |
| "LATENT" | |
| ] | |
| ], | |
| "groups": [], | |
| "config": {}, | |
| "extra": { | |
| "ds": { | |
| "scale": 0.6830134553650714, | |
| "offset": [ | |
| 73.42422056322742, | |
| 83.67389678726582 | |
| ] | |
| } | |
| }, | |
| "version": 0.4 | |
| } |