From 812648e6cd062aa3c51760ddbe9354851d7c2d9e Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Tue, 12 Apr 2022 12:50:35 +0200 Subject: [PATCH] Rename QRDQN logger key (#67) --- docs/misc/changelog.rst | 1 + sb3_contrib/qrdqn/qrdqn.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index d2b0136..6c38155 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -11,6 +11,7 @@ Breaking Changes: - Upgraded to Stable-Baselines3 >= 1.5.1a1 - Changed the way policy "aliases" are handled ("MlpPolicy", "CnnPolicy", ...), removing the former ``register_policy`` helper, ``policy_base`` parameter and using ``policy_aliases`` static attributes instead (@Gregwar) +- Renamed ``rollout/exploration rate`` key to ``rollout/exploration_rate`` for QRDQN (to be consistent with SB3 DQN) New Features: ^^^^^^^^^^^^^ diff --git a/sb3_contrib/qrdqn/qrdqn.py b/sb3_contrib/qrdqn/qrdqn.py index 24f286c..50aea1c 100644 --- a/sb3_contrib/qrdqn/qrdqn.py +++ b/sb3_contrib/qrdqn/qrdqn.py @@ -159,7 +159,7 @@ class QRDQN(OffPolicyAlgorithm): polyak_update(self.quantile_net.parameters(), self.quantile_net_target.parameters(), self.tau) self.exploration_rate = self.exploration_schedule(self._current_progress_remaining) - self.logger.record("rollout/exploration rate", self.exploration_rate) + self.logger.record("rollout/exploration_rate", self.exploration_rate) def train(self, gradient_steps: int, batch_size: int = 100) -> None: # Switch to train mode (this affects batch norm / dropout)