Fix `train_freq` type annotation for TQC and QR-DQN (#229)
* fix train_freq type for tqc and qrdn * fix typo * Update changelog --------- Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
parent
bc3c0a9595
commit
cd31e89e26
|
|
@ -32,6 +32,8 @@ Deprecations:
|
|||
Others:
|
||||
^^^^^^^
|
||||
|
||||
- Fixed ``train_freq`` type annotation for tqc and qrdqn (@Armandpl)
|
||||
|
||||
Documentation:
|
||||
^^^^^^^^^^^^^^
|
||||
- Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered)
|
||||
|
|
@ -545,4 +547,4 @@ Contributors:
|
|||
-------------
|
||||
|
||||
@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
|
||||
@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered
|
||||
@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered @Armandpl
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ class QRDQN(OffPolicyAlgorithm):
|
|||
batch_size: int = 32,
|
||||
tau: float = 1.0,
|
||||
gamma: float = 0.99,
|
||||
train_freq: int = 4,
|
||||
train_freq: Union[int, Tuple[int, str]] = 4,
|
||||
gradient_steps: int = 1,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class TQC(OffPolicyAlgorithm):
|
|||
batch_size: int = 256,
|
||||
tau: float = 0.005,
|
||||
gamma: float = 0.99,
|
||||
train_freq: int = 1,
|
||||
train_freq: Union[int, Tuple[int, str]] = 1,
|
||||
gradient_steps: int = 1,
|
||||
action_noise: Optional[ActionNoise] = None,
|
||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||
|
|
|
|||
Loading…
Reference in New Issue