Update QRDQN defaults (#225)
This commit is contained in:
parent
1553b66ee4
commit
3f0c5088b3
|
|
@ -3,6 +3,38 @@
|
||||||
Changelog
|
Changelog
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
Release 2.3.0a1 (WIP)
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Breaking Changes:
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
- Upgraded to Stable-Baselines3 >= 2.3.0
|
||||||
|
- The default ``leanrning_starts`` parameter of ``QRDQN`` have been changed to be consistent with the other offpolicy algorithms
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# SB3 < 2.3.0 default hyperparameters, 50_000 corresponded to Atari defaults hyperparameters
|
||||||
|
# model = QRDQN("MlpPolicy", env, learning_start=50_000)
|
||||||
|
# SB3 >= 2.3.0:
|
||||||
|
model = QRDQN("MlpPolicy", env, learning_start=100)
|
||||||
|
|
||||||
|
|
||||||
|
New Features:
|
||||||
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Bug Fixes:
|
||||||
|
^^^^^^^^^^
|
||||||
|
|
||||||
|
Deprecations:
|
||||||
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Others:
|
||||||
|
^^^^^^^
|
||||||
|
|
||||||
|
Documentation:
|
||||||
|
^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
|
||||||
Release 2.2.1 (2023-11-17)
|
Release 2.2.1 (2023-11-17)
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,8 @@ class QRDQN(OffPolicyAlgorithm):
|
||||||
"""
|
"""
|
||||||
Quantile Regression Deep Q-Network (QR-DQN)
|
Quantile Regression Deep Q-Network (QR-DQN)
|
||||||
Paper: https://arxiv.org/abs/1710.10044
|
Paper: https://arxiv.org/abs/1710.10044
|
||||||
Default hyperparameters are taken from the paper and are tuned for Atari games.
|
Default hyperparameters are taken from the paper and are tuned for Atari games
|
||||||
|
(except for the ``learning_starts`` parameter).
|
||||||
|
|
||||||
:param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
|
:param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
|
||||||
:param env: The environment to learn from (if registered in Gym, can be str)
|
:param env: The environment to learn from (if registered in Gym, can be str)
|
||||||
|
|
@ -77,7 +78,7 @@ class QRDQN(OffPolicyAlgorithm):
|
||||||
env: Union[GymEnv, str],
|
env: Union[GymEnv, str],
|
||||||
learning_rate: Union[float, Schedule] = 5e-5,
|
learning_rate: Union[float, Schedule] = 5e-5,
|
||||||
buffer_size: int = 1000000, # 1e6
|
buffer_size: int = 1000000, # 1e6
|
||||||
learning_starts: int = 50000,
|
learning_starts: int = 100,
|
||||||
batch_size: int = 32,
|
batch_size: int = 32,
|
||||||
tau: float = 1.0,
|
tau: float = 1.0,
|
||||||
gamma: float = 0.99,
|
gamma: float = 0.99,
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
2.2.1
|
2.3.0a1
|
||||||
|
|
|
||||||
2
setup.py
2
setup.py
|
|
@ -65,7 +65,7 @@ setup(
|
||||||
packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
|
packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
|
||||||
package_data={"sb3_contrib": ["py.typed", "version.txt"]},
|
package_data={"sb3_contrib": ["py.typed", "version.txt"]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"stable_baselines3>=2.2.1,<3.0",
|
"stable_baselines3>=2.3.0a0,<3.0",
|
||||||
],
|
],
|
||||||
description="Contrib package of Stable Baselines3, experimental code.",
|
description="Contrib package of Stable Baselines3, experimental code.",
|
||||||
author="Antonin Raffin",
|
author="Antonin Raffin",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue