【Maniskill】StackCube-v1 官方命令训练结果不稳定的研究报告
1. 多次训练日志: 0.06 ~ 0.69
#!/bin/bash# 定义一个包含不同随机种子的数组
SEEDS=(1 2 3 4 5)# 循环遍历每个种子并运行训练命令
for seed in "${SEEDS[@]}"
doecho "=================================================="echo "Running training with SEED=${seed}"echo "=================================================="python sac.py --env_id="StackCube-v1" \--num_envs=32 --utd=0.5 --buffer_size=500_000 \--total_timesteps=500_000 --eval_freq=50_000 \--control-mode="pd_ee_delta_pos" \--seed=${seed} \--exp-name="StackCube-v1-sac-stability-test-seed-${seed}"echo "Finished training with SEED=${seed}"
doneecho "All experiments completed."
./run_experiments.sh
==================================================
Running training with SEED=1
==================================================
Saving eval trajectories/videos to runs/StackCube-v1-sac-stability-test-seed-1/videos
Running training
success_once: 0.00, return: 2.59: 0%| | 0/500000 [00:04<?, ?it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_0.pt
success_once: 0.00, return: 22.25: 10%|█████▉ | 50048/500000 [04:14<39:37, 189.24it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_50048.pt
success_once: 0.00, return: 23.50: 20%|███████████▌ | 100032/500000 [08:44<34:39, 192.31it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_100032.pt
success_once: 0.00, return: 25.88: 30%|█████████████████▍ | 150016/500000 [13:11<31:52, 183.00it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_150016.pt
success_once: 0.00, return: 30.63: 40%|███████████████████████▏ | 200000/500000 [17:42<26:00, 192.21it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_200000.pt
success_once: 0.00, return: 30.41: 50%|█████████████████████████████ | 250048/500000 [22:08<23:25, 177.82it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_250048.pt
success_once: 0.06, return: 31.34: 60%|██████████████████████████████████▊ | 300032/500000 [26:37<17:45, 187.74it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_300032.pt
success_once: 0.19, return: 33.52: 70%|████████████████████████████████████████▌ | 350016/500000 [31:02<12:53, 193.99it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_350016.pt
success_once: 0.56, return: 35.08: 80%|██████████████████████████████████████████████▍ | 400000/500000 [35:28<08:47, 189.56it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_400000.pt
success_once: 0.69, return: 36.54: 90%|████████████████████████████████████████████████████▏ | 450048/500000 [39:54<04:12, 197.61it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-1/ckpt_450048.pt
success_once: 0.69, return: 36.54: : 500032it [44:12, 191.93it/s] model saved to runs/StackCube-v1-sac-stability-test-seed-1/final_ckpt.pt
Finished training with SEED=1
==================================================
Running training with SEED=2
==================================================
Saving eval trajectories/videos to runs/StackCube-v1-sac-stability-test-seed-2/videos
Running training
success_once: 0.00, return: 1.67: 0%| | 0/500000 [00:04<?, ?it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_0.pt
success_once: 0.00, return: 22.81: 10%|██████▊ | 50048/500000 [04:02<37:34, 199.54it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_50048.pt
success_once: 0.00, return: 24.48: 20%|█████████████▍ | 100032/500000 [08:19<34:12, 194.82it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_100032.pt
success_once: 0.00, return: 26.24: 30%|████████████████████ | 150016/500000 [12:38<29:31, 197.60it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_150016.pt
success_once: 0.00, return: 29.55: 40%|██████████████████████████▊ | 200000/500000 [16:57<25:51, 193.37it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_200000.pt
success_once: 0.00, return: 31.27: 50%|█████████████████████████████████▌ | 250048/500000 [21:18<21:25, 194.39it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_250048.pt
success_once: 0.06, return: 33.84: 60%|████████████████████████████████████████▏ | 300032/500000 [25:39<17:05, 194.99it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_300032.pt
success_once: 0.00, return: 33.06: 70%|██████████████████████████████████████████████▉ | 350016/500000 [29:57<12:38, 197.67it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_350016.pt
success_once: 0.00, return: 33.60: 80%|█████████████████████████████████████████████████████▌ | 400000/500000 [34:16<08:42, 191.36it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_400000.pt
success_once: 0.00, return: 35.10: 90%|████████████████████████████████████████████████████████████▎ | 450048/500000 [38:37<04:15, 195.67it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-2/ckpt_450048.pt
success_once: 0.00, return: 35.10: : 500032it [42:52, 197.34it/s] model saved to runs/StackCube-v1-sac-stability-test-seed-2/final_ckpt.pt
Finished training with SEED=2
==================================================
Running training with SEED=3
==================================================
Saving eval trajectories/videos to runs/StackCube-v1-sac-stability-test-seed-3/videos
Running training
success_once: 0.00, return: 2.82: 0%| | 0/500000 [00:04<?, ?it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_0.pt
success_once: 0.00, return: 24.34: 10%|██████▊ | 50048/500000 [04:03<38:02, 197.15it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_50048.pt
success_once: 0.00, return: 25.16: 20%|█████████████▍ | 100032/500000 [08:18<33:32, 198.77it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_100032.pt
success_once: 0.00, return: 25.14: 30%|████████████████████ | 150016/500000 [12:36<29:23, 198.48it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_150016.pt
success_once: 0.00, return: 26.26: 40%|██████████████████████████▊ | 200000/500000 [16:55<25:50, 193.48it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_200000.pt
success_once: 0.00, return: 29.69: 50%|█████████████████████████████████▌ | 250048/500000 [21:14<20:54, 199.21it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_250048.pt
success_once: 0.00, return: 32.74: 60%|████████████████████████████████████████▏ | 300032/500000 [25:33<17:03, 195.30it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_300032.pt
success_once: 0.00, return: 34.06: 70%|██████████████████████████████████████████████▉ | 350016/500000 [29:51<12:40, 197.21it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_350016.pt
success_once: 0.06, return: 35.08: 80%|█████████████████████████████████████████████████████▌ | 400000/500000 [34:10<08:32, 194.94it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_400000.pt
success_once: 0.00, return: 32.85: 90%|████████████████████████████████████████████████████████████▎ | 450048/500000 [38:29<04:11, 198.46it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-3/ckpt_450048.pt
success_once: 0.00, return: 32.85: : 500032it [42:44, 192.70it/s] model saved to runs/StackCube-v1-sac-stability-test-seed-3/final_ckpt.pt
Finished training with SEED=3
==================================================
Running training with SEED=4
==================================================
Saving eval trajectories/videos to runs/StackCube-v1-sac-stability-test-seed-4/videos
Running training
success_once: 0.00, return: 3.52: 0%| | 0/500000 [00:04<?, ?it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_0.pt
success_once: 0.00, return: 23.66: 10%|██████▊ | 50048/500000 [04:04<37:59, 197.41it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_50048.pt
success_once: 0.00, return: 24.11: 20%|█████████████▍ | 100032/500000 [08:25<34:31, 193.07it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_100032.pt
success_once: 0.00, return: 25.82: 30%|████████████████████ | 150016/500000 [12:44<29:32, 197.42it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_150016.pt
success_once: 0.00, return: 25.73: 40%|██████████████████████████▊ | 200000/500000 [17:05<25:48, 193.70it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_200000.pt
success_once: 0.00, return: 29.89: 50%|█████████████████████████████████▌ | 250048/500000 [21:26<21:32, 193.31it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_250048.pt
success_once: 0.00, return: 32.04: 60%|████████████████████████████████████████▏ | 300032/500000 [25:46<16:58, 196.26it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_300032.pt
success_once: 0.00, return: 31.27: 70%|██████████████████████████████████████████████▉ | 350016/500000 [30:05<12:39, 197.36it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_350016.pt
success_once: 0.12, return: 32.71: 80%|█████████████████████████████████████████████████████▌ | 400000/500000 [34:26<08:49, 188.77it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_400000.pt
success_once: 0.19, return: 35.28: 90%|████████████████████████████████████████████████████████████▎ | 450048/500000 [38:48<04:12, 198.13it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-4/ckpt_450048.pt
success_once: 0.19, return: 35.28: : 500032it [43:03, 196.74it/s] model saved to runs/StackCube-v1-sac-stability-test-seed-4/final_ckpt.pt
Finished training with SEED=4
==================================================
Running training with SEED=5
==================================================
Saving eval trajectories/videos to runs/StackCube-v1-sac-stability-test-seed-5/videos
Running training
success_once: 0.00, return: 4.23: 0%| | 0/500000 [00:04<?, ?it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_0.pt
success_once: 0.00, return: 24.10: 10%|██████▊ | 50048/500000 [04:02<37:45, 198.64it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_50048.pt
success_once: 0.00, return: 23.54: 20%|█████████████▍ | 100032/500000 [08:19<33:25, 199.39it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_100032.pt
success_once: 0.00, return: 26.54: 30%|████████████████████ | 150016/500000 [12:35<29:15, 199.42it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_150016.pt
success_once: 0.00, return: 26.79: 40%|██████████████████████████▊ | 200000/500000 [16:54<25:51, 193.37it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_200000.pt
success_once: 0.00, return: 30.06: 50%|█████████████████████████████████▌ | 250048/500000 [21:13<21:01, 198.13it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_250048.pt
success_once: 0.06, return: 32.48: 60%|████████████████████████████████████████▏ | 300032/500000 [25:33<17:08, 194.36it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_300032.pt
success_once: 0.06, return: 32.70: 70%|██████████████████████████████████████████████▉ | 350016/500000 [29:50<12:32, 199.19it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_350016.pt
success_once: 0.12, return: 33.48: 80%|█████████████████████████████████████████████████████▌ | 400000/500000 [34:09<08:33, 194.91it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_400000.pt
success_once: 0.06, return: 35.01: 90%|████████████████████████████████████████████████████████████▎ | 450048/500000 [38:28<04:05, 203.20it/s]model saved to runs/StackCube-v1-sac-stability-test-seed-5/ckpt_450048.pt
success_once: 0.06, return: 35.01: : 500032it [42:43, 194.44it/s] model saved to runs/StackCube-v1-sac-stability-test-seed-5/final_ckpt.pt
Finished training with SEED=5
All experiments completed.
可以看到训练结果不稳定,success_once 指标的差异较大;