Fix `train_freq` type annotation for TQC and QR-DQN (#229)

* fix train_freq type for tqc and qrdn

* fix typo

* Update changelog

---------

Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
Armand du Parc Locmaria 2024-01-24 10:44:38 +01:00 committed by Andreas Schaarschmidt
parent bc3c0a9595
commit cd31e89e26
3 changed files with 5 additions and 3 deletions

View File

@ -32,6 +32,8 @@ Deprecations:
Others: Others:
^^^^^^^ ^^^^^^^
- Fixed ``train_freq`` type annotation for tqc and qrdqn (@Armandpl)
Documentation: Documentation:
^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^
- Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered) - Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered)
@ -545,4 +547,4 @@ Contributors:
------------- -------------
@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec @ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered @mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered @Armandpl

View File

@ -82,7 +82,7 @@ class QRDQN(OffPolicyAlgorithm):
batch_size: int = 32, batch_size: int = 32,
tau: float = 1.0, tau: float = 1.0,
gamma: float = 0.99, gamma: float = 0.99,
train_freq: int = 4, train_freq: Union[int, Tuple[int, str]] = 4,
gradient_steps: int = 1, gradient_steps: int = 1,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None, replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
replay_buffer_kwargs: Optional[Dict[str, Any]] = None, replay_buffer_kwargs: Optional[Dict[str, Any]] = None,

View File

@ -88,7 +88,7 @@ class TQC(OffPolicyAlgorithm):
batch_size: int = 256, batch_size: int = 256,
tau: float = 0.005, tau: float = 0.005,
gamma: float = 0.99, gamma: float = 0.99,
train_freq: int = 1, train_freq: Union[int, Tuple[int, str]] = 1,
gradient_steps: int = 1, gradient_steps: int = 1,
action_noise: Optional[ActionNoise] = None, action_noise: Optional[ActionNoise] = None,
replay_buffer_class: Optional[Type[ReplayBuffer]] = None, replay_buffer_class: Optional[Type[ReplayBuffer]] = None,