Robotics
Transformers
Safetensors
LeRobot
English
Gr00tN1d6
vision-language-action
manipulation
gr00t
nvidia
physical-ai
humanoid
reachy2
Instructions to use ganatrask/NOVA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ganatrask/NOVA with Transformers:
# Load model directly from transformers import Gr00tN1d6 model = Gr00tN1d6.from_pretrained("ganatrask/NOVA", dtype="auto") - LeRobot
How to use ganatrask/NOVA with LeRobot:
- Notebooks
- Google Colab
- Kaggle
File size: 4,791 Bytes
8412ad5 fecc0f2 8412ad5 fecc0f2 8412ad5 fecc0f2 8412ad5 fecc0f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | load_config_path: null
model:
model_type: Gr00tN1d6
model_dtype: bfloat16
model_name: nvidia/Eagle-Block2A-2B-v2
backbone_model_type: eagle
model_revision: null
tune_top_llm_layers: 4
backbone_embedding_dim: 2048
tune_llm: false
tune_visual: false
select_layer: 16
reproject_vision: false
use_flash_attention: true
load_bf16: false
collator_overwrite_image_inputs: false
eagle_collator: true
backbone_trainable_params_fp32: true
image_crop_size: null
image_target_size: null
shortest_image_edge: 256
crop_fraction: 0.95
random_rotation_angle: null
color_jitter_params: null
use_albumentations_transforms: true
formalize_language: true
apply_sincos_state_encoding: false
use_relative_action: true
max_state_dim: 29
max_action_dim: 29
action_horizon: 16
hidden_size: 1024
input_embedding_dim: 1536
add_pos_embed: true
attn_dropout: 0.2
use_vlln: true
max_seq_len: 1024
use_alternate_vl_dit: true
attend_text_every_n_blocks: 2
diffusion_model_cfg:
positional_embeddings: null
num_layers: 32
num_attention_heads: 32
attention_head_dim: 48
norm_type: ada_norm
dropout: 0.2
final_dropout: true
output_dim: 1024
interleave_self_attention: true
num_inference_timesteps: 4
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_timestep_buckets: 1000
tune_projector: true
tune_diffusion_model: true
tune_vlln: true
state_dropout_prob: 0.0
state_additive_noise_scale: 0.0
max_num_embodiments: 32
data:
datasets:
- dataset_paths:
- ./datasets/reachy2_100
embodiment_tag: reachy2
mix_ratio: 1.0
dataset_type: physical_embodiment
val_dataset_path: null
modality_configs:
reachy2:
video:
delta_indices:
- 0
modality_keys:
- front_cam
sin_cos_embedding_keys: null
mean_std_embedding_keys: null
action_configs: null
state:
delta_indices:
- 0
modality_keys:
- arm_joints
sin_cos_embedding_keys: null
mean_std_embedding_keys: null
action_configs: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
modality_keys:
- arm_joints
- gripper
sin_cos_embedding_keys: null
mean_std_embedding_keys: null
action_configs:
- rep: RELATIVE
type: NON_EEF
format: DEFAULT
state_key: arm_joints
- rep: ABSOLUTE
type: NON_EEF
format: DEFAULT
state_key: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.task_description
sin_cos_embedding_keys: null
mean_std_embedding_keys: null
action_configs: null
download_cache: false
shard_size: 1024
episode_sampling_rate: 0.1
num_shards_per_epoch: 10000
override_pretraining_statistics: false
mode: single_turn
random_chop: 0.0
mock_dataset_mode: false
shuffle: true
seed: 42
multiprocessing_context: fork
allow_padding: false
subsample_ratio: 1.0
image_crop_size:
- 244
- 244
image_target_size:
- 224
- 224
video_backend: decord
training:
output_dir: /tmp/groot_output/reachy2
experiment_name: null
max_steps: 30000
global_batch_size: 64
batch_size: null
gradient_accumulation_steps: 1
learning_rate: 0.0001
lr_scheduler_type: cosine
weight_decay: 1.0e-05
warmup_ratio: 0.05
warmup_steps: 0
max_grad_norm: 1.0
optim: adamw_torch
start_from_checkpoint: nvidia/GR00T-N1.6-3B
tf32: true
fp16: false
bf16: true
eval_bf16: true
logging_steps: 10
save_steps: 3000
save_total_limit: 5
save_vl_model: false
upload_checkpoints: false
upload_every: 1000
upload_last_n_checkpoints: 5
max_concurrent_uploads: 2
eval_strategy: 'no'
eval_steps: 500
eval_set_split_ratio: 0.1
eval_batch_size: 2
save_best_eval_metric_name: ''
save_best_eval_metric_greater_is_better: true
deepspeed_stage: 2
gradient_checkpointing: false
transformers_trust_remote_code: true
transformers_local_files_only: false
transformers_cache_dir: null
transformers_access_token: null
use_ddp: false
ddp_bucket_cap_mb: 100
num_gpus: 1
dataloader_num_workers: 8
remove_unused_columns: false
use_wandb: false
wandb_project: finetune-gr00t-n1d6
enable_profiling: false
max_retries: 3
assert_loss_less_than: null
add_rl_callback: false
enable_open_loop_eval: false
open_loop_eval_traj_ids:
- 0
open_loop_eval_steps_per_traj: 100
open_loop_eval_plot_indices: null
max_steps: 30000
save_steps: 3000
|