Fix returned type in predict (#88)

* actions[0] -> actions.squeeze(0) * Update changelog * Update changelog * Update version Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
2022-07-18 11:49:03 +02:00 · 2022-07-18 11:49:03 +02:00 · 3cbd2429be
parent c9d621b816
commit 3cbd2429be
4 changed files with 20 additions and 4 deletions
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@ -3,6 +3,22 @@
 Changelog
 ==========

+Release 1.6.1a0 (WIP)
+-------------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+- Fixed the issue that ``predict`` does not always return action as ``np.ndarray`` (@qgallouedec)
+
+New Features:
+^^^^^^^^^^^^^
+
+Bug Fixes:
+^^^^^^^^^^
+
+Deprecations:
+^^^^^^^^^^^^^
+
 Release 1.6.0 (2022-07-11)
 -------------------------------

@ -276,4 +292,4 @@ Stable-Baselines3 is currently maintained by `Antonin Raffin`_ (aka `@araffin`_)
 Contributors:
 -------------

-@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt
+@ku2482 @guyk1971 @minhlong94 @ayeright @kronion @glmcdona @cyprienc @sgillen @Gregwar @rnederstigt @qgallouedec
--- a/sb3_contrib/common/maskable/policies.py
+++ b/sb3_contrib/common/maskable/policies.py
@ -254,7 +254,7 @@ class MaskableActorCriticPolicy(BasePolicy):
        if not vectorized_env:
            if state is not None:
                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
-            actions = actions[0]
+            actions = actions.squeeze(axis=0)

        return actions, None

--- a/sb3_contrib/common/recurrent/policies.py
+++ b/sb3_contrib/common/recurrent/policies.py
@ -416,7 +416,7 @@ class RecurrentActorCriticPolicy(ActorCriticPolicy):

        # Remove batch dimension if needed
        if not vectorized_env:
-            actions = actions[0]
+            actions = actions.squeeze(axis=0)

        return actions, states

--- a/sb3_contrib/version.txt
+++ b/sb3_contrib/version.txt
@ -1 +1 @@
-1.6.0
+1.6.1a0
 @ -1 +1 @@
 .6.0
 .6.1a0