upload via upload_folder 2025-08-04T13:40:58.066788+00:00
Browse files- README.md +2 -2
- eval_result.json +6 -1
- full_model.pt +2 -2
- params.json +24 -20
- replay.mp4 +0 -0
- state_dict.pt +2 -2
- tensorboard/events.out.tfevents.1754311775.winkindeMacBook-Air.local.97570.0 +3 -0
README.md
CHANGED
|
@@ -20,7 +20,7 @@ model-index:
|
|
| 20 |
type: LunarLander-v3
|
| 21 |
metrics:
|
| 22 |
- type: mean_reward
|
| 23 |
-
value:
|
| 24 |
name: mean_reward
|
| 25 |
verified: false
|
| 26 |
---
|
|
@@ -47,4 +47,4 @@ state, _ = env.reset()
|
|
| 47 |
action = model.action(state)
|
| 48 |
...
|
| 49 |
```
|
| 50 |
-
There is also a state dict version of the model, you can check the corresponding
|
|
|
|
| 20 |
type: LunarLander-v3
|
| 21 |
metrics:
|
| 22 |
- type: mean_reward
|
| 23 |
+
value: 250.02 +/- 55.89
|
| 24 |
name: mean_reward
|
| 25 |
verified: false
|
| 26 |
---
|
|
|
|
| 47 |
action = model.action(state)
|
| 48 |
...
|
| 49 |
```
|
| 50 |
+
There is also a state dict version of the model, you can check the corresponding definition in the repo.
|
eval_result.json
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mean_reward": 250.0192132640677,
|
| 3 |
+
"std_reward": 55.891006920169296,
|
| 4 |
+
"datetime": "2025-08-04T12:51:26.075627+00:00",
|
| 5 |
+
"train_duration_min": "1.65"
|
| 6 |
+
}
|
full_model.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6dec154d435a9f76d7a2b1e5640a41dbfb7eedcc51e054352fd4624b064bac2
|
| 3 |
+
size 285941
|
params.json
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
"env_id": "LunarLander-v3",
|
| 4 |
"env_kwargs": {},
|
| 5 |
"max_steps": null,
|
|
|
|
| 6 |
"use_image": false,
|
| 7 |
"vector_env_num": 6,
|
| 8 |
"use_multi_processing": true,
|
|
@@ -11,34 +12,37 @@
|
|
| 11 |
"frame_skip": 1,
|
| 12 |
"training_render_mode": null
|
| 13 |
},
|
| 14 |
-
"device": "
|
| 15 |
-
"learning_rate": 0.
|
| 16 |
"gamma": 0.99,
|
| 17 |
"checkpoint_pathname": "",
|
|
|
|
|
|
|
| 18 |
"eval_episodes": 50,
|
| 19 |
"eval_random_seed": 42,
|
| 20 |
"eval_video_num": 10,
|
| 21 |
-
"total_steps":
|
| 22 |
-
"rollout_len":
|
| 23 |
"gae_lambda_or_n_step": 0.97,
|
| 24 |
"entropy_coef": {
|
| 25 |
"_type": "LinearSchedule",
|
| 26 |
"_module": "practice.utils_for_coding.scheduler_utils",
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
|
|
|
| 30 |
},
|
| 31 |
-
"value_loss_coef": 0.
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
}
|
|
|
|
| 3 |
"env_id": "LunarLander-v3",
|
| 4 |
"env_kwargs": {},
|
| 5 |
"max_steps": null,
|
| 6 |
+
"normalize_obs": false,
|
| 7 |
"use_image": false,
|
| 8 |
"vector_env_num": 6,
|
| 9 |
"use_multi_processing": true,
|
|
|
|
| 12 |
"frame_skip": 1,
|
| 13 |
"training_render_mode": null
|
| 14 |
},
|
| 15 |
+
"device": "cpu",
|
| 16 |
+
"learning_rate": 0.0003,
|
| 17 |
"gamma": 0.99,
|
| 18 |
"checkpoint_pathname": "",
|
| 19 |
+
"max_grad_norm": 0.5,
|
| 20 |
+
"log_interval": 10,
|
| 21 |
"eval_episodes": 50,
|
| 22 |
"eval_random_seed": 42,
|
| 23 |
"eval_video_num": 10,
|
| 24 |
+
"total_steps": 900000,
|
| 25 |
+
"rollout_len": 128,
|
| 26 |
"gae_lambda_or_n_step": 0.97,
|
| 27 |
"entropy_coef": {
|
| 28 |
"_type": "LinearSchedule",
|
| 29 |
"_module": "practice.utils_for_coding.scheduler_utils",
|
| 30 |
+
"_start_e": 0.2,
|
| 31 |
+
"_end_e": 0.1,
|
| 32 |
+
"_duration": 600,
|
| 33 |
+
"_start_t": 0
|
| 34 |
},
|
| 35 |
+
"value_loss_coef": 0.5,
|
| 36 |
+
"critic_lr": 0.0003,
|
| 37 |
+
"critic_lr_gamma": 0.995,
|
| 38 |
+
"normalize_returns": false,
|
| 39 |
+
"hidden_sizes": [
|
| 40 |
+
256,
|
| 41 |
+
256
|
| 42 |
+
],
|
| 43 |
+
"reward_clip": [
|
| 44 |
+
-1,
|
| 45 |
+
1
|
| 46 |
+
],
|
| 47 |
+
"value_clip_range": 1.0
|
| 48 |
}
|
replay.mp4
CHANGED
|
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
|
state_dict.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dfd5f204e4d8fb31457ab5ccedaee58dc7bda7148099d3c1c26d1e2a3ae9f7e
|
| 3 |
+
size 283957
|
tensorboard/events.out.tfevents.1754311775.winkindeMacBook-Air.local.97570.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:101429b97396a01230cc115fa6571805d63f8a8fd1c1ae4df1ad62eb6da800fc
|
| 3 |
+
size 83191
|