From 3b007ae93b6177a4ee712f9f1af5dc1183b0abcb Mon Sep 17 00:00:00 2001 From: Antonin Raffin Date: Wed, 29 Dec 2021 15:03:51 +0100 Subject: [PATCH] Fix TRPO doc --- docs/guide/examples.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst index 3af5561..c39ce26 100644 --- a/docs/guide/examples.rst +++ b/docs/guide/examples.rst @@ -45,15 +45,15 @@ Train a PPO with invalid action masking agent on a toy environment. model.learn(5000) model.save("qrdqn_cartpole") - TRPO - ---- +TRPO +---- - Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment. +Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment. - .. code-block:: python +.. code-block:: python - from sb3_contrib import TRPO + from sb3_contrib import TRPO - model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1) - model.learn(total_timesteps=100_000, log_interval=4) - model.save("trpo_pendulum") + model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1) + model.learn(total_timesteps=100_000, log_interval=4) + model.save("trpo_pendulum")