Update QRDQN defaults (#225)

This commit is contained in:
Antonin RAFFIN 2024-01-12 16:17:44 +01:00 committed by Andreas Schaarschmidt
parent 1553b66ee4
commit 3f0c5088b3
4 changed files with 37 additions and 4 deletions

View File

@ -3,6 +3,38 @@
Changelog Changelog
========== ==========
Release 2.3.0a1 (WIP)
--------------------------
Breaking Changes:
^^^^^^^^^^^^^^^^^
- Upgraded to Stable-Baselines3 >= 2.3.0
- The default ``leanrning_starts`` parameter of ``QRDQN`` have been changed to be consistent with the other offpolicy algorithms
.. code-block:: python
# SB3 < 2.3.0 default hyperparameters, 50_000 corresponded to Atari defaults hyperparameters
# model = QRDQN("MlpPolicy", env, learning_start=50_000)
# SB3 >= 2.3.0:
model = QRDQN("MlpPolicy", env, learning_start=100)
New Features:
^^^^^^^^^^^^^
Bug Fixes:
^^^^^^^^^^
Deprecations:
^^^^^^^^^^^^^
Others:
^^^^^^^
Documentation:
^^^^^^^^^^^^^^
Release 2.2.1 (2023-11-17) Release 2.2.1 (2023-11-17)
-------------------------- --------------------------

View File

@ -20,7 +20,8 @@ class QRDQN(OffPolicyAlgorithm):
""" """
Quantile Regression Deep Q-Network (QR-DQN) Quantile Regression Deep Q-Network (QR-DQN)
Paper: https://arxiv.org/abs/1710.10044 Paper: https://arxiv.org/abs/1710.10044
Default hyperparameters are taken from the paper and are tuned for Atari games. Default hyperparameters are taken from the paper and are tuned for Atari games
(except for the ``learning_starts`` parameter).
:param policy: The policy model to use (MlpPolicy, CnnPolicy, ...) :param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
:param env: The environment to learn from (if registered in Gym, can be str) :param env: The environment to learn from (if registered in Gym, can be str)
@ -77,7 +78,7 @@ class QRDQN(OffPolicyAlgorithm):
env: Union[GymEnv, str], env: Union[GymEnv, str],
learning_rate: Union[float, Schedule] = 5e-5, learning_rate: Union[float, Schedule] = 5e-5,
buffer_size: int = 1000000, # 1e6 buffer_size: int = 1000000, # 1e6
learning_starts: int = 50000, learning_starts: int = 100,
batch_size: int = 32, batch_size: int = 32,
tau: float = 1.0, tau: float = 1.0,
gamma: float = 0.99, gamma: float = 0.99,

View File

@ -1 +1 @@
2.2.1 2.3.0a1

View File

@ -65,7 +65,7 @@ setup(
packages=[package for package in find_packages() if package.startswith("sb3_contrib")], packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
package_data={"sb3_contrib": ["py.typed", "version.txt"]}, package_data={"sb3_contrib": ["py.typed", "version.txt"]},
install_requires=[ install_requires=[
"stable_baselines3>=2.2.1,<3.0", "stable_baselines3>=2.3.0a0,<3.0",
], ],
description="Contrib package of Stable Baselines3, experimental code.", description="Contrib package of Stable Baselines3, experimental code.",
author="Antonin Raffin", author="Antonin Raffin",