diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst index 3af5561..c39ce26 100644 --- a/docs/guide/examples.rst +++ b/docs/guide/examples.rst @@ -45,15 +45,15 @@ Train a PPO with invalid action masking agent on a toy environment. model.learn(5000) model.save("qrdqn_cartpole") - TRPO - ---- +TRPO +---- - Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment. +Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment. - .. code-block:: python +.. code-block:: python - from sb3_contrib import TRPO + from sb3_contrib import TRPO - model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1) - model.learn(total_timesteps=100_000, log_interval=4) - model.save("trpo_pendulum") + model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1) + model.learn(total_timesteps=100_000, log_interval=4) + model.save("trpo_pendulum")