# RL-Games PPO Configuration for Long Trajectory Assembly # Copyright (c) 2022-2025, The Isaac Lab Project Developers. params: seed: 42 # Environment wrapper clipping env: clip_observations: 5.0 clip_actions: 1.0 algo: name: a2c_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: False space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 0 fixed_sigma: True mlp: units: [512, 256, 128] activation: elu d2rl: False initializer: name: default regularizer: name: None load_checkpoint: False load_path: '' config: name: Galaxea-LongTrajectoryAssembly-Direct-v0 full_experiment_name: LongTrajectoryAssembly env_name: rlgpu device: 'cuda:0' device_name: 'cuda:0' multi_gpu: False ppo: True mixed_precision: False normalize_input: True normalize_value: True # value_bootstrap: True # Commented out to match Isaac Lab examples num_actors: -1 # Will be set by num_envs reward_shaper: scale_value: 1.0 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 100000 max_epochs: 5000 save_best_after: 100 save_frequency: 100 print_stats: True grad_norm: 1.0 entropy_coef: 0.001 truncate_grads: True e_clip: 0.2 clip_value: True # PPO specific horizon_length: 32 minibatch_size: 16384 mini_epochs: 8 critic_coef: 2 bounds_loss_coef: 0.0001 # Add bounds loss coefficient to prevent b_loss error # Training games_to_track: 100 player: deterministic: True games_num: 1000000 print_stats: True