Fix TRPO doc
This commit is contained in:
parent
59be198da0
commit
3b007ae93b
|
|
@ -45,15 +45,15 @@ Train a PPO with invalid action masking agent on a toy environment.
|
||||||
model.learn(5000)
|
model.learn(5000)
|
||||||
model.save("qrdqn_cartpole")
|
model.save("qrdqn_cartpole")
|
||||||
|
|
||||||
TRPO
|
TRPO
|
||||||
----
|
----
|
||||||
|
|
||||||
Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment.
|
Train a Trust Region Policy Optimization (TRPO) agent on the Pendulum environment.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from sb3_contrib import TRPO
|
from sb3_contrib import TRPO
|
||||||
|
|
||||||
model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1)
|
model = TRPO("MlpPolicy", "Pendulum-v0", gamma=0.9, verbose=1)
|
||||||
model.learn(total_timesteps=100_000, log_interval=4)
|
model.learn(total_timesteps=100_000, log_interval=4)
|
||||||
model.save("trpo_pendulum")
|
model.save("trpo_pendulum")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue