Fix `train_freq` type annotation for TQC and QR-DQN (#229)

* fix train_freq type for tqc and qrdn * fix typo * Update changelog --------- Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
2024-01-24 10:44:38 +01:00 · 2024-01-24 10:44:38 +01:00 · cd31e89e26
parent bc3c0a9595
commit cd31e89e26
3 changed files with 5 additions and 3 deletions
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@ -32,6 +32,8 @@ Deprecations:
 Others:
 ^^^^^^^
 - Fixed ``train_freq`` type annotation for tqc and qrdqn (@Armandpl)
 Documentation:
 ^^^^^^^^^^^^^^
 - Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered)
@ -545,4 +547,4 @@ Contributors:
 -------------
@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
-@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered
+@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered @Armandpl
--- a/sb3_contrib/qrdqn/qrdqn.py
+++ b/sb3_contrib/qrdqn/qrdqn.py
@ -82,7 +82,7 @@ class QRDQN(OffPolicyAlgorithm):
        batch_size: int = 32,
        tau: float = 1.0,
        gamma: float = 0.99,
-        train_freq: int = 4,
+        train_freq: Union[int, Tuple[int, str]] = 4,
        gradient_steps: int = 1,
        replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
        replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
--- a/sb3_contrib/tqc/tqc.py
+++ b/sb3_contrib/tqc/tqc.py
@ -88,7 +88,7 @@ class TQC(OffPolicyAlgorithm):
        batch_size: int = 256,
        tau: float = 0.005,
        gamma: float = 0.99,
-        train_freq: int = 1,
+        train_freq: Union[int, Tuple[int, str]] = 1,
        gradient_steps: int = 1,
        action_noise: Optional[ActionNoise] = None,
        replay_buffer_class: Optional[Type[ReplayBuffer]] = None,