From 812648e6cd062aa3c51760ddbe9354851d7c2d9e Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Tue, 12 Apr 2022 12:50:35 +0200
Subject: [PATCH] Rename QRDQN logger key (#67)

---
 docs/misc/changelog.rst    | 1 +
 sb3_contrib/qrdqn/qrdqn.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index d2b0136..6c38155 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -11,6 +11,7 @@ Breaking Changes:
 - Upgraded to Stable-Baselines3 >= 1.5.1a1
 - Changed the way policy "aliases" are handled ("MlpPolicy", "CnnPolicy", ...), removing the former
   ``register_policy`` helper, ``policy_base`` parameter and using ``policy_aliases`` static attributes instead (@Gregwar)
+- Renamed ``rollout/exploration rate`` key to ``rollout/exploration_rate`` for QRDQN (to be consistent with SB3 DQN)
 
 New Features:
 ^^^^^^^^^^^^^
diff --git a/sb3_contrib/qrdqn/qrdqn.py b/sb3_contrib/qrdqn/qrdqn.py
index 24f286c..50aea1c 100644
--- a/sb3_contrib/qrdqn/qrdqn.py
+++ b/sb3_contrib/qrdqn/qrdqn.py
@@ -159,7 +159,7 @@ class QRDQN(OffPolicyAlgorithm):
             polyak_update(self.quantile_net.parameters(), self.quantile_net_target.parameters(), self.tau)
 
         self.exploration_rate = self.exploration_schedule(self._current_progress_remaining)
-        self.logger.record("rollout/exploration rate", self.exploration_rate)
+        self.logger.record("rollout/exploration_rate", self.exploration_rate)
 
     def train(self, gradient_steps: int, batch_size: int = 100) -> None:
         # Switch to train mode (this affects batch norm / dropout)