Fix `train_freq` type annotation for TQC and QR-DQN (#229)
* fix train_freq type for tqc and qrdn * fix typo * Update changelog --------- Co-authored-by: Antonin Raffin <antonin.raffin@dlr.de>
This commit is contained in:
parent
bc3c0a9595
commit
cd31e89e26
|
|
@ -32,6 +32,8 @@ Deprecations:
|
||||||
Others:
|
Others:
|
||||||
^^^^^^^
|
^^^^^^^
|
||||||
|
|
||||||
|
- Fixed ``train_freq`` type annotation for tqc and qrdqn (@Armandpl)
|
||||||
|
|
||||||
Documentation:
|
Documentation:
|
||||||
^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^
|
||||||
- Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered)
|
- Add some additional notes about ``MaskablePPO`` (evaluation and multi-process) (@icheered)
|
||||||
|
|
@ -545,4 +547,4 @@ Contributors:
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
|
@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
|
||||||
@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered
|
@mlodel @CppMaster @burakdmb @honglu2875 @ZikangXiong @AlexPasqua @jonasreiher @icheered @Armandpl
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ class QRDQN(OffPolicyAlgorithm):
|
||||||
batch_size: int = 32,
|
batch_size: int = 32,
|
||||||
tau: float = 1.0,
|
tau: float = 1.0,
|
||||||
gamma: float = 0.99,
|
gamma: float = 0.99,
|
||||||
train_freq: int = 4,
|
train_freq: Union[int, Tuple[int, str]] = 4,
|
||||||
gradient_steps: int = 1,
|
gradient_steps: int = 1,
|
||||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||||
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
replay_buffer_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ class TQC(OffPolicyAlgorithm):
|
||||||
batch_size: int = 256,
|
batch_size: int = 256,
|
||||||
tau: float = 0.005,
|
tau: float = 0.005,
|
||||||
gamma: float = 0.99,
|
gamma: float = 0.99,
|
||||||
train_freq: int = 1,
|
train_freq: Union[int, Tuple[int, str]] = 1,
|
||||||
gradient_steps: int = 1,
|
gradient_steps: int = 1,
|
||||||
action_noise: Optional[ActionNoise] = None,
|
action_noise: Optional[ActionNoise] = None,
|
||||||
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
replay_buffer_class: Optional[Type[ReplayBuffer]] = None,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue