Robotics
Transformers
Safetensors
LeRobot
English
Gr00tN1d6
vision-language-action
manipulation
gr00t
nvidia
physical-ai
humanoid
reachy2
Instructions to use ganatrask/NOVA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ganatrask/NOVA with Transformers:
# Load model directly from transformers import Gr00tN1d6 model = Gr00tN1d6.from_pretrained("ganatrask/NOVA", dtype="auto") - LeRobot
How to use ganatrask/NOVA with LeRobot:
- Notebooks
- Google Colab
- Kaggle
| load_config_path: null | |
| model: | |
| model_type: Gr00tN1d6 | |
| model_dtype: bfloat16 | |
| model_name: nvidia/Eagle-Block2A-2B-v2 | |
| backbone_model_type: eagle | |
| model_revision: null | |
| tune_top_llm_layers: 4 | |
| backbone_embedding_dim: 2048 | |
| tune_llm: false | |
| tune_visual: false | |
| select_layer: 16 | |
| reproject_vision: false | |
| use_flash_attention: true | |
| load_bf16: false | |
| collator_overwrite_image_inputs: false | |
| eagle_collator: true | |
| backbone_trainable_params_fp32: true | |
| image_crop_size: null | |
| image_target_size: null | |
| shortest_image_edge: 256 | |
| crop_fraction: 0.95 | |
| random_rotation_angle: null | |
| color_jitter_params: null | |
| use_albumentations_transforms: true | |
| formalize_language: true | |
| apply_sincos_state_encoding: false | |
| use_relative_action: true | |
| max_state_dim: 29 | |
| max_action_dim: 29 | |
| action_horizon: 16 | |
| hidden_size: 1024 | |
| input_embedding_dim: 1536 | |
| add_pos_embed: true | |
| attn_dropout: 0.2 | |
| use_vlln: true | |
| max_seq_len: 1024 | |
| use_alternate_vl_dit: true | |
| attend_text_every_n_blocks: 2 | |
| diffusion_model_cfg: | |
| positional_embeddings: null | |
| num_layers: 32 | |
| num_attention_heads: 32 | |
| attention_head_dim: 48 | |
| norm_type: ada_norm | |
| dropout: 0.2 | |
| final_dropout: true | |
| output_dim: 1024 | |
| interleave_self_attention: true | |
| num_inference_timesteps: 4 | |
| noise_beta_alpha: 1.5 | |
| noise_beta_beta: 1.0 | |
| noise_s: 0.999 | |
| num_timestep_buckets: 1000 | |
| tune_projector: true | |
| tune_diffusion_model: true | |
| tune_vlln: true | |
| state_dropout_prob: 0.0 | |
| state_additive_noise_scale: 0.0 | |
| max_num_embodiments: 32 | |
| data: | |
| datasets: | |
| - dataset_paths: | |
| - ./datasets/reachy2_100 | |
| embodiment_tag: reachy2 | |
| mix_ratio: 1.0 | |
| dataset_type: physical_embodiment | |
| val_dataset_path: null | |
| modality_configs: | |
| reachy2: | |
| video: | |
| delta_indices: | |
| - 0 | |
| modality_keys: | |
| - front_cam | |
| sin_cos_embedding_keys: null | |
| mean_std_embedding_keys: null | |
| action_configs: null | |
| state: | |
| delta_indices: | |
| - 0 | |
| modality_keys: | |
| - arm_joints | |
| sin_cos_embedding_keys: null | |
| mean_std_embedding_keys: null | |
| action_configs: null | |
| action: | |
| delta_indices: | |
| - 0 | |
| - 1 | |
| - 2 | |
| - 3 | |
| - 4 | |
| - 5 | |
| - 6 | |
| - 7 | |
| - 8 | |
| - 9 | |
| - 10 | |
| - 11 | |
| - 12 | |
| - 13 | |
| - 14 | |
| - 15 | |
| modality_keys: | |
| - arm_joints | |
| - gripper | |
| sin_cos_embedding_keys: null | |
| mean_std_embedding_keys: null | |
| action_configs: | |
| - rep: RELATIVE | |
| type: NON_EEF | |
| format: DEFAULT | |
| state_key: arm_joints | |
| - rep: ABSOLUTE | |
| type: NON_EEF | |
| format: DEFAULT | |
| state_key: null | |
| language: | |
| delta_indices: | |
| - 0 | |
| modality_keys: | |
| - annotation.human.task_description | |
| sin_cos_embedding_keys: null | |
| mean_std_embedding_keys: null | |
| action_configs: null | |
| download_cache: false | |
| shard_size: 1024 | |
| episode_sampling_rate: 0.1 | |
| num_shards_per_epoch: 10000 | |
| override_pretraining_statistics: false | |
| mode: single_turn | |
| random_chop: 0.0 | |
| mock_dataset_mode: false | |
| shuffle: true | |
| seed: 42 | |
| multiprocessing_context: fork | |
| allow_padding: false | |
| subsample_ratio: 1.0 | |
| image_crop_size: | |
| - 244 | |
| - 244 | |
| image_target_size: | |
| - 224 | |
| - 224 | |
| video_backend: decord | |
| training: | |
| output_dir: /tmp/groot_output/reachy2 | |
| experiment_name: null | |
| max_steps: 30000 | |
| global_batch_size: 64 | |
| batch_size: null | |
| gradient_accumulation_steps: 1 | |
| learning_rate: 0.0001 | |
| lr_scheduler_type: cosine | |
| weight_decay: 1.0e-05 | |
| warmup_ratio: 0.05 | |
| warmup_steps: 0 | |
| max_grad_norm: 1.0 | |
| optim: adamw_torch | |
| start_from_checkpoint: nvidia/GR00T-N1.6-3B | |
| tf32: true | |
| fp16: false | |
| bf16: true | |
| eval_bf16: true | |
| logging_steps: 10 | |
| save_steps: 3000 | |
| save_total_limit: 5 | |
| save_vl_model: false | |
| upload_checkpoints: false | |
| upload_every: 1000 | |
| upload_last_n_checkpoints: 5 | |
| max_concurrent_uploads: 2 | |
| eval_strategy: 'no' | |
| eval_steps: 500 | |
| eval_set_split_ratio: 0.1 | |
| eval_batch_size: 2 | |
| save_best_eval_metric_name: '' | |
| save_best_eval_metric_greater_is_better: true | |
| deepspeed_stage: 2 | |
| gradient_checkpointing: false | |
| transformers_trust_remote_code: true | |
| transformers_local_files_only: false | |
| transformers_cache_dir: null | |
| transformers_access_token: null | |
| use_ddp: false | |
| ddp_bucket_cap_mb: 100 | |
| num_gpus: 1 | |
| dataloader_num_workers: 8 | |
| remove_unused_columns: false | |
| use_wandb: false | |
| wandb_project: finetune-gr00t-n1d6 | |
| enable_profiling: false | |
| max_retries: 3 | |
| assert_loss_less_than: null | |
| add_rl_callback: false | |
| enable_open_loop_eval: false | |
| open_loop_eval_traj_ids: | |
| - 0 | |
| open_loop_eval_steps_per_traj: 100 | |
| open_loop_eval_plot_indices: null | |
| max_steps: 30000 | |
| save_steps: 3000 | |