From 467ffee554908af7c3637e41abc5ed5a9bbf9ab3 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Mon, 5 Oct 2020 21:15:30 +0300 Subject: [PATCH 01/13] Add initial version of contributing --- CONTRIBUTING.md | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..24cd8fa --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,159 @@ +## Contributing to Stable-Baselines3 - Contrib + +This contrib repository is designed for experimental implementations of various +training algorithms so that others may make use of them. This includes full +training algorithms, different tools (e.g. new environment wrappers, +callbacks) and extending algorithms implemented in stable-baselines3. + +**Before opening a pull request**, open an issue discussing the contribution. +Once we agree that the plan looks good, go ahead and implement it. + +Contributions and review focuses on following three parts: +1) **Implementation quality** + - Performance of the training algorithms should match what proposed authors reported. + - This is ensured by including a code that replicates an experiment from the original + paper or from an established codebase (e.g. the code from authors), as well as + a test to check that implementation works on program level (does not crash). +2) Documentation + - Documentation quality should match that of stable-baselines3, with each algorithm + containing its own README file, changelog, in-code documentation to clarify the flow + of logic and report the expected results. +3) Consistency with stable-baselines3 + - To ease readibility, all contributions need to follow the code style (see below) and + ideoms used in stable-baselines3. + +The implementation quality is a strict requirements with little room for changes, because +otherwise the implementation can do more harm than good (wrong results). Parts two and three +are taken into account during review but being a repository for more experimental code, these +are not very strict. + +## How to implement your suggestion + +All code will go under `sb3_contrib/[feature_name]` directory, regardless of what they implement. +The idea is to keep different contributions separate from each other and only over time combine/mature +them into a shared package. + +Implement your feature/suggestion/algorithm in following ways, using the first one that applies: +1) Environment wrapper: This can be used with any algorithm and even outside stable-baselines3. +2) [Custom callback](https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html) +3) Following the style/naming of `common` files in the stable-baseline3. If your suggestion is a specific network architecture + for feature extraction from images, place this under `sb3_contrib/[feature_name]/torch_layers.py`, for example. +4) A new learning algorithm. This is the last resort but most applicable solution. + Even if your suggestion is a (trivial) modification to an existing algorithm, create a new algorithm for it + unless otherwise discussed (which inherits the base algorithm). The algorithm should use same API as + stable-baselines3 algorithms (e.g. `learn`, `load`) + +Look over stable-baselines3 code for the general naming of variables and try to keep this style. + +If algorithm you are implementing involves more complex/uncommon equations, comment each part of these +calculations with references to the parts in paper. + +## Pull Request (PR) and review + +Before proposing a PR, please open an issue, where the feature will be discussed. +This prevent from duplicated PR to be proposed and also ease the code review process. + +Each PR need to be reviewed and accepted by at least one of the maintainers. +A PR must pass the Continuous Integration tests to be merged with the master branch. + +Along with the code, PR **must** include the following: +1) `README.md` file for the feature (see template below). This is placed in the algorithm's directory. +2) Results of a replicated experiment from the original paper, **which must match the results from authors** + unless solid arguments can be provided why they did not match. +3) The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the + code information about the environment used (Python version, library versions, OS, hardware information). +4) A new test file under `tests/`, which tests the implementation for functional errors. This this is **not** for + testing e.g. training performance of a learning algorithm, and should be relatively quick to run. + +README.md template: + +```markdown +# [Feature/Algorithm name] + +* Non-abreviated name and/or one-sentence description of the method. +* Link and reference to the original publications the present the feature, or other established source(s). +* Links to any codebases that were used for reference (e.g. authors' implementations) + +## Example + +A minimal example on how to use the feature (full, runnable code). + +## Results + +A copy of results reported in the original paper and results obtained by your replicate of the experiments, as well +as an overview of the experiment setup (full details are in the code you will provide). + +## Comments + +Comments regarding the implementation, e.g. missing parts, uncertain parts, differences +to the original implementation. + +## Changelog + +Per-algorithm changelog, in format "dd/mm/yyyy username: comment". E.g: +* 05.10.2020 Miffyli: Adding missing instructions for contrib repo +* 04.10.2020 Miffyli: Initial commit +```` + +If you are not familiar with creating a Pull Request, here are some guides: +- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request +- https://help.github.com/articles/creating-a-pull-request/ + + +## Codestyle + +We are using [black codestyle](https://github.com/psf/black) (max line length of 127 characters) together with [isort](https://github.com/timothycrosley/isort) to sort the imports. + +**Please run `make format`** to reformat your code. You can check the codestyle using `make check-codestyle` and `make lint`. + +Please document each function/method and [type](https://google.github.io/pytype/user_guide.html) them using the following template: + +```python + +def my_function(arg1: type1, arg2: type2) -> returntype: + """ + Short description of the function. + + :param arg1: (type1) describe what is arg1 + :param arg2: (type2) describe what is arg2 + :return: (returntype) describe what is returned + """ + ... + return my_variable +``` + +## Tests + +All new features and algorithms must add tests in the `tests/` folder ensuring that everything works fine (on program level). +We use [pytest](https://pytest.org/). +Also, when a bug fix is proposed, tests should be added to avoid regression. + +To run tests with `pytest`: + +``` +make pytest +``` + +Type checking with `pytype`: + +``` +make type +``` + +Codestyle check with `black`, `isort` and `flake8`: + +``` +make check-codestyle +make lint +``` + +To run `pytype`, `format` and `lint` in one command: +``` +make commit-checks +``` + +## Changelog and Documentation + +Please do not forget to update the changelog (`CHANGELOG.rst`). + +Credits: this contributing guide is based on the [PyTorch](https://github.com/pytorch/pytorch/) one. From e21ba4b2b124130961c2ab261c629a910f3a8d5c Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Tue, 6 Oct 2020 01:44:11 +0300 Subject: [PATCH 02/13] Remove datatypes from example function --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 24cd8fa..44dd08c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -114,9 +114,9 @@ def my_function(arg1: type1, arg2: type2) -> returntype: """ Short description of the function. - :param arg1: (type1) describe what is arg1 - :param arg2: (type2) describe what is arg2 - :return: (returntype) describe what is returned + :param arg1: describe what is arg1 + :param arg2: describe what is arg2 + :return: describe what is returned """ ... return my_variable From 86508f0694a9fb85e5807ddfc8ed37755cefc8e9 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Tue, 6 Oct 2020 01:48:42 +0300 Subject: [PATCH 03/13] Replace info about tests --- CONTRIBUTING.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 44dd08c..599db51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,8 +62,9 @@ Along with the code, PR **must** include the following: unless solid arguments can be provided why they did not match. 3) The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the code information about the environment used (Python version, library versions, OS, hardware information). -4) A new test file under `tests/`, which tests the implementation for functional errors. This this is **not** for - testing e.g. training performance of a learning algorithm, and should be relatively quick to run. +4) Updated tests in `tests/test_run.py` and `tests/test_save_load.py` to test that algorithms run as expected and serialize + correctly. This this is **not** for testing e.g. training performance of a learning algorithm, and + should be relatively quick to run. README.md template: From 383ce59c8567c126d38848eab2fb75ce9b878821 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Sun, 11 Oct 2020 22:02:45 +0300 Subject: [PATCH 04/13] Update info on where code should be placed --- CONTRIBUTING.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 599db51..49f3394 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,19 +29,18 @@ are not very strict. ## How to implement your suggestion -All code will go under `sb3_contrib/[feature_name]` directory, regardless of what they implement. -The idea is to keep different contributions separate from each other and only over time combine/mature -them into a shared package. - Implement your feature/suggestion/algorithm in following ways, using the first one that applies: -1) Environment wrapper: This can be used with any algorithm and even outside stable-baselines3. -2) [Custom callback](https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html) +1) Environment wrapper: This can be used with any algorithm and even outside stable-baselines3. + Place code for these under `sb3_contrib/common/wrappers` directory. +2) [Custom callback](https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html). + Place code under `sb3_contrib/common/callbacks` directory. 3) Following the style/naming of `common` files in the stable-baseline3. If your suggestion is a specific network architecture - for feature extraction from images, place this under `sb3_contrib/[feature_name]/torch_layers.py`, for example. + for feature extraction from images, place this in `sb3_contrib/common/torch_layers.py`, for example. 4) A new learning algorithm. This is the last resort but most applicable solution. Even if your suggestion is a (trivial) modification to an existing algorithm, create a new algorithm for it unless otherwise discussed (which inherits the base algorithm). The algorithm should use same API as - stable-baselines3 algorithms (e.g. `learn`, `load`) + stable-baselines3 algorithms (e.g. `learn`, `load`), and the code should be placed under + `sb3_contrib/[algorithm_name]` directory. Look over stable-baselines3 code for the general naming of variables and try to keep this style. From fbe8e6536fc4384a3792d3a2cecd0e64120d42cd Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Sun, 11 Oct 2020 22:09:48 +0300 Subject: [PATCH 05/13] Add note about experimental tag in sb3 issues --- CONTRIBUTING.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 49f3394..9cdd9f2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -27,6 +27,9 @@ otherwise the implementation can do more harm than good (wrong results). Parts t are taken into account during review but being a repository for more experimental code, these are not very strict. +See [issues with "experimental" tag](https://github.com/DLR-RM/stable-baselines3/issues?q=is%3Aissue+is%3Aopen+label%3Aexperimental) +for suggestions of the community for new possible features to include in contrib. + ## How to implement your suggestion Implement your feature/suggestion/algorithm in following ways, using the first one that applies: From eed8a2970541e3f2fc5488ae212968903cd5506a Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Sun, 11 Oct 2020 22:31:40 +0300 Subject: [PATCH 06/13] Update docs on sharing results/code to replicate. --- CONTRIBUTING.md | 62 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9cdd9f2..dabe82c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,8 +16,8 @@ Contributions and review focuses on following three parts: a test to check that implementation works on program level (does not crash). 2) Documentation - Documentation quality should match that of stable-baselines3, with each algorithm - containing its own README file, changelog, in-code documentation to clarify the flow - of logic and report the expected results. + containing its own documentation page, changelog, in-code documentation to clarify the flow + of logic and report of the expected results. 3) Consistency with stable-baselines3 - To ease readibility, all contributions need to follow the code style (see below) and ideoms used in stable-baselines3. @@ -59,43 +59,57 @@ Each PR need to be reviewed and accepted by at least one of the maintainers. A PR must pass the Continuous Integration tests to be merged with the master branch. Along with the code, PR **must** include the following: -1) `README.md` file for the feature (see template below). This is placed in the algorithm's directory. -2) Results of a replicated experiment from the original paper, **which must match the results from authors** +1) Update to documentation to include a description of the feature (see template below). +2) Results of a replicated experiment from the original paper in the documentation, **which must match the results from authors** unless solid arguments can be provided why they did not match. 3) The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the - code information about the environment used (Python version, library versions, OS, hardware information). -4) Updated tests in `tests/test_run.py` and `tests/test_save_load.py` to test that algorithms run as expected and serialize + code information about the environment used (Python version, library versions, OS, hardware information). If small enough, + include this in the documentation. If applicable, use [rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo) to + run the agent performance comparison experiments (fork repository, implement experiment in a new branch and share link to + that branch). If above do not apply, create new code to replicate the experiment and include link to it. +4) Updated tests in `tests/test_run.py` and `tests/test_save_load.py` to test that features run as expected and serialize correctly. This this is **not** for testing e.g. training performance of a learning algorithm, and should be relatively quick to run. -README.md template: +Template for the feature documentation -```markdown -# [Feature/Algorithm name] +```rst +[Feature/Algorithm name] +======================== -* Non-abreviated name and/or one-sentence description of the method. -* Link and reference to the original publications the present the feature, or other established source(s). -* Links to any codebases that were used for reference (e.g. authors' implementations) +- Non-abreviated name and/or one-sentence description of the method. +- Link and reference to the original publications the present the feature, or other established source(s). +- Links to any codebases that were used for reference (e.g. authors' implementations) -## Example +Example +------- A minimal example on how to use the feature (full, runnable code). -## Results +Results +------- -A copy of results reported in the original paper and results obtained by your replicate of the experiments, as well -as an overview of the experiment setup (full details are in the code you will provide). +A description and comparison of results (e.g. how the change improved results over the non-changed algorithm), if +applicable. -## Comments +Include the expected results from the work that originally proposed the method (e.g. original paper). + +Include the code to replicate these results or a link to repository/branch where the code can be found. +Use `rl-baselines3-zoo `_ if possible, fork it, create a new branch +and share the code to replicate results there. + +Comments +-------- Comments regarding the implementation, e.g. missing parts, uncertain parts, differences to the original implementation. -## Changelog +Changelog +--------- -Per-algorithm changelog, in format "dd/mm/yyyy username: comment". E.g: -* 05.10.2020 Miffyli: Adding missing instructions for contrib repo -* 04.10.2020 Miffyli: Initial commit +Per-feature changelog, in format "dd/mm/yyyy username: comment". E.g: +- 05.10.2020 arrafin: Adding missing instructions for contrib repo +- 04.10.2020 Miffyli: Initial commit ```` If you are not familiar with creating a Pull Request, here are some guides: @@ -155,6 +169,12 @@ To run `pytype`, `format` and `lint` in one command: make commit-checks ``` +Build the documentation: + +``` +make doc +``` + ## Changelog and Documentation Please do not forget to update the changelog (`CHANGELOG.rst`). From 31da024e96609c4b8d8b0895b86f2c0db4f24316 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Mon, 12 Oct 2020 00:47:43 +0300 Subject: [PATCH 07/13] Update info on docs. Remove changelog part of per-algo docs --- CONTRIBUTING.md | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dabe82c..d4a54ac 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,7 @@ ## Contributing to Stable-Baselines3 - Contrib This contrib repository is designed for experimental implementations of various -training algorithms so that others may make use of them. This includes full +parts of reinforcement training so that others may make use of them. This includes full training algorithms, different tools (e.g. new environment wrappers, callbacks) and extending algorithms implemented in stable-baselines3. @@ -10,14 +10,14 @@ Once we agree that the plan looks good, go ahead and implement it. Contributions and review focuses on following three parts: 1) **Implementation quality** - - Performance of the training algorithms should match what proposed authors reported. + - Performance of the training algorithms should match what proposed authors reported (if applicable). - This is ensured by including a code that replicates an experiment from the original paper or from an established codebase (e.g. the code from authors), as well as a test to check that implementation works on program level (does not crash). 2) Documentation - - Documentation quality should match that of stable-baselines3, with each algorithm - containing its own documentation page, changelog, in-code documentation to clarify the flow - of logic and report of the expected results. + - Documentation quality should match that of stable-baselines3, with each feature covered + in the documentation, in-code documentation to clarify the flow + of logic and report of the expected results, where applicable. 3) Consistency with stable-baselines3 - To ease readibility, all contributions need to follow the code style (see below) and ideoms used in stable-baselines3. @@ -59,10 +59,12 @@ Each PR need to be reviewed and accepted by at least one of the maintainers. A PR must pass the Continuous Integration tests to be merged with the master branch. Along with the code, PR **must** include the following: -1) Update to documentation to include a description of the feature (see template below). -2) Results of a replicated experiment from the original paper in the documentation, **which must match the results from authors** - unless solid arguments can be provided why they did not match. -3) The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the +1) Update to documentation to include a description of the feature. If feature is a simple tool (e.g. wrapper, callback), + this goes under respective pages in documentation. If full training algorithm, this goes under a new page with template below + (`docs/modules/[algo_name]`). +2) If a training algorithm/improvement: results of a replicated experiment from the original paper in the documentation, + **which must match the results from authors** unless solid arguments can be provided why they did not match. +3) If above holds: The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the code information about the environment used (Python version, library versions, OS, hardware information). If small enough, include this in the documentation. If applicable, use [rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo) to run the agent performance comparison experiments (fork repository, implement experiment in a new branch and share link to @@ -71,7 +73,7 @@ Along with the code, PR **must** include the following: correctly. This this is **not** for testing e.g. training performance of a learning algorithm, and should be relatively quick to run. -Template for the feature documentation +Below is a template for documentation for full training algorithms. ```rst [Feature/Algorithm name] @@ -103,13 +105,6 @@ Comments Comments regarding the implementation, e.g. missing parts, uncertain parts, differences to the original implementation. - -Changelog ---------- - -Per-feature changelog, in format "dd/mm/yyyy username: comment". E.g: -- 05.10.2020 arrafin: Adding missing instructions for contrib repo -- 04.10.2020 Miffyli: Initial commit ```` If you are not familiar with creating a Pull Request, here are some guides: From 79fcf54e1e82ef871a2d1610dcd379680332a390 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Thu, 15 Oct 2020 02:17:36 +0300 Subject: [PATCH 08/13] Review docs and update changelog --- docs/README.md | 4 ++-- docs/index.rst | 4 ---- docs/misc/changelog.rst | 2 ++ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/README.md b/docs/README.md index 1427a79..e75267f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ -## Stable Baselines3 Documentation +## Stable Baselines3 Contrib Documentation -This folder contains documentation for the RL baselines. +This folder contains documentation for the RL baselines contribution repository. ### Build the Documentation diff --git a/docs/index.rst b/docs/index.rst index b77a85f..79c5f5d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -64,10 +64,6 @@ To cite this project in publications: Contributing ------------ -To any interested in making the rl baselines better, there are still some improvements -that need to be done. -You can check issues in the `repo `_. - If you want to contribute, please read `CONTRIBUTING.md `_ first. Indices and tables diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 6cb9a38..db44b6d 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -24,6 +24,8 @@ Others: Documentation: ^^^^^^^^^^^^^^ +- Added initial documentation +- Added contribution guide and related PR templates Maintainers From 95892f31e65511987cc2c485531c1b9ba3d6a626 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Thu, 15 Oct 2020 02:27:22 +0300 Subject: [PATCH 09/13] Update PR template --- .github/PULL_REQUEST_TEMPLATE.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2b35d61..197aabf 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,13 +1,11 @@ - + ## Description - + -## Motivation and Context - - - -- [ ] I have raised an issue to propose this change ([required](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) for new features and bug fixes) +## Context + +- [ ] I have raised an issue to propose this change ([required](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib/blob/master/CONTRIBUTING.md)) ## Types of changes @@ -19,11 +17,13 @@ ## Checklist: -- [ ] I've read the [CONTRIBUTION](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) guide (**required**) -- [ ] I have updated the changelog accordingly (**required**). -- [ ] My change requires a change to the documentation. +- [ ] I've read the [CONTRIBUTION](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib/blob/master/CONTRIBUTING.md) guide (**required**) +- [ ] The functionality/performance matches that of the source (**required** for new training algorithms or training-related features). - [ ] I have updated the tests accordingly (*required for a bug fix or a new feature*). +- [ ] I have included an example of using the feature (*required for new features*). +- [ ] I have included baseline results (**required** for new training algorithms or training-related features). - [ ] I have updated the documentation accordingly. +- [ ] I have updated the changelog accordingly (**required**). - [ ] I have reformatted the code using `make format` (**required**) - [ ] I have checked the codestyle using `make check-codestyle` and `make lint` (**required**) - [ ] I have ensured `make pytest` and `make type` both pass. (**required**) From 4a1695cf7b2fb2d6b88b7b8c0160506786a47070 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Thu, 15 Oct 2020 02:46:29 +0300 Subject: [PATCH 10/13] Update README --- README.md | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7280244..bd26110 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,40 @@ # Stable-Baselines3 - Contrib -Contrib package for [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) - Experimental code +Contrib package for [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) - Experimental code. +"sb3-contrib" for short. -**You need the master version of Stable Baselines3**: -``` -pip install git+https://github.com/DLR-RM/stable-baselines3 -``` +A place for training algorithms and tools that are considered experimental, e.g. implementations of the latest +publications. Goal is to keep the simplicity, documentation and style of stable-baselines3 but for less matured +implementations. -Implemented: +Why create this repository? Over the span of stable-baselines and stable-baselines3, the community has been eager +to contribute in form of better logging utilities, environment wrappers, extended support (e.g. different action spaces) +and learning algorithms. However sometimes these utilities were too niche to be considered for stable-baselines or +proved to be too difficult to integrate well into existing code without a mess. sb3-contrib aims to fix this by +not requiring the neatest code integration with existing code and not setting limits on what is too niche: almost everything +remotely useful goes! We hope this allows to extend the known quality of stable-baselines style and documentation beyond +the relatively small scope of utilities of the main repository. + + +## Features + +See documentation for the full list of included features. + +**Training algorithms**: - [Truncated Quantile Critics (TQC)](https://arxiv.org/abs/2005.04269) ## Installation +**Note:** You need the `master` version of [Stable Baselines3](https://github.com/DLR-RM/stable-baselines3/). + +To install Stable Baselines3 `master` version: +``` +pip install git+https://github.com/DLR-RM/stable-baselines3 +``` + +Install Stable Baselines3 - Contrib using pip: ``` pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib ``` From 00f9d26d55a5625704cd3a0ee30f6cb7ede4f6d5 Mon Sep 17 00:00:00 2001 From: "Anssi \"Miffyli\" Kanervisto" Date: Thu, 15 Oct 2020 02:50:16 +0300 Subject: [PATCH 11/13] Spell-checking and small update to issue template --- .github/ISSUE_TEMPLATE/issue-template.md | 1 + CONTRIBUTING.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/issue-template.md b/.github/ISSUE_TEMPLATE/issue-template.md index 1268278..b8189aa 100644 --- a/.github/ISSUE_TEMPLATE/issue-template.md +++ b/.github/ISSUE_TEMPLATE/issue-template.md @@ -32,6 +32,7 @@ for both code and stack traces. ```python from stable_baselines3 import ... +from sb3_contrib import ... ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4a54ac..decf076 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,8 +19,8 @@ Contributions and review focuses on following three parts: in the documentation, in-code documentation to clarify the flow of logic and report of the expected results, where applicable. 3) Consistency with stable-baselines3 - - To ease readibility, all contributions need to follow the code style (see below) and - ideoms used in stable-baselines3. + - To ease readability, all contributions need to follow the code style (see below) and + idioms used in stable-baselines3. The implementation quality is a strict requirements with little room for changes, because otherwise the implementation can do more harm than good (wrong results). Parts two and three @@ -79,7 +79,7 @@ Below is a template for documentation for full training algorithms. [Feature/Algorithm name] ======================== -- Non-abreviated name and/or one-sentence description of the method. +- Non-abbreviated name and/or one-sentence description of the method. - Link and reference to the original publications the present the feature, or other established source(s). - Links to any codebases that were used for reference (e.g. authors' implementations) From 926e4881969d621a21112ff1fae93ff80c1d3f8e Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Sat, 17 Oct 2020 17:04:00 +0200 Subject: [PATCH 12/13] Update wording and links --- .github/ISSUE_TEMPLATE/issue-template.md | 1 + CHANGELOG.md | 2 +- CONTRIBUTING.md | 16 +++++----- Makefile | 2 +- README.md | 37 +++++++++++++++--------- docs/index.rst | 4 +-- 6 files changed, 37 insertions(+), 25 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/issue-template.md b/.github/ISSUE_TEMPLATE/issue-template.md index b8189aa..4c3ddaa 100644 --- a/.github/ISSUE_TEMPLATE/issue-template.md +++ b/.github/ISSUE_TEMPLATE/issue-template.md @@ -44,6 +44,7 @@ Traceback (most recent call last): File ... **System Info** Describe the characteristic of your environment: * Describe how the library was installed (pip, docker, source, ...) + * Stable-Baselines3 and sb3-contrib versions * GPU models and configuration * Python version * PyTorch version diff --git a/CHANGELOG.md b/CHANGELOG.md index c14db75..40573a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## Release 0.9.0a2 (WIP) +## Release 0.10.0a0 (WIP) ### Breaking Changes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index decf076..8ebb1ec 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ This contrib repository is designed for experimental implementations of various parts of reinforcement training so that others may make use of them. This includes full -training algorithms, different tools (e.g. new environment wrappers, +RL algorithms, different tools (e.g. new environment wrappers, callbacks) and extending algorithms implemented in stable-baselines3. **Before opening a pull request**, open an issue discussing the contribution. @@ -10,9 +10,9 @@ Once we agree that the plan looks good, go ahead and implement it. Contributions and review focuses on following three parts: 1) **Implementation quality** - - Performance of the training algorithms should match what proposed authors reported (if applicable). + - Performance of the RL algorithms should match the one reported by the original authors (if applicable). - This is ensured by including a code that replicates an experiment from the original - paper or from an established codebase (e.g. the code from authors), as well as + paper or from an established codebase (e.g. the code from authors), as well as a test to check that implementation works on program level (does not crash). 2) Documentation - Documentation quality should match that of stable-baselines3, with each feature covered @@ -20,7 +20,7 @@ Contributions and review focuses on following three parts: of logic and report of the expected results, where applicable. 3) Consistency with stable-baselines3 - To ease readability, all contributions need to follow the code style (see below) and - idioms used in stable-baselines3. + idioms used in stable-baselines3. The implementation quality is a strict requirements with little room for changes, because otherwise the implementation can do more harm than good (wrong results). Parts two and three @@ -33,7 +33,7 @@ for suggestions of the community for new possible features to include in contrib ## How to implement your suggestion Implement your feature/suggestion/algorithm in following ways, using the first one that applies: -1) Environment wrapper: This can be used with any algorithm and even outside stable-baselines3. +1) Environment wrapper: This can be used with any algorithm and even outside stable-baselines3. Place code for these under `sb3_contrib/common/wrappers` directory. 2) [Custom callback](https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html). Place code under `sb3_contrib/common/callbacks` directory. @@ -63,17 +63,17 @@ Along with the code, PR **must** include the following: this goes under respective pages in documentation. If full training algorithm, this goes under a new page with template below (`docs/modules/[algo_name]`). 2) If a training algorithm/improvement: results of a replicated experiment from the original paper in the documentation, - **which must match the results from authors** unless solid arguments can be provided why they did not match. + **which must match the results from authors** unless solid arguments can be provided why they did not match. 3) If above holds: The **exact** code to run the replicated experiment (i.e. it should produce the above results), and inside the code information about the environment used (Python version, library versions, OS, hardware information). If small enough, include this in the documentation. If applicable, use [rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo) to - run the agent performance comparison experiments (fork repository, implement experiment in a new branch and share link to + run the agent performance comparison experiments (fork repository, implement experiment in a new branch and share link to that branch). If above do not apply, create new code to replicate the experiment and include link to it. 4) Updated tests in `tests/test_run.py` and `tests/test_save_load.py` to test that features run as expected and serialize correctly. This this is **not** for testing e.g. training performance of a learning algorithm, and should be relatively quick to run. -Below is a template for documentation for full training algorithms. +Below is a template for documentation for full RL algorithms. ```rst [Feature/Algorithm name] diff --git a/Makefile b/Makefile index d740b60..a8f34af 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ pytest: ./scripts/run_tests.sh type: - pytype + pytype -j auto lint: # stop the build if there are Python syntax errors or undefined names diff --git a/README.md b/README.md index bd26110..cdcc9fe 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,38 @@ + + [![CI](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib/workflows/CI/badge.svg)](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib/actions) [![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -# Stable-Baselines3 - Contrib +# Stable-Baselines3 - Contrib (SB3-Contrib) -Contrib package for [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) - Experimental code. +Contrib package for [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) - Experimental reinforcement learning (RL) code. "sb3-contrib" for short. -A place for training algorithms and tools that are considered experimental, e.g. implementations of the latest -publications. Goal is to keep the simplicity, documentation and style of stable-baselines3 but for less matured -implementations. +### What is SB3-Contrib? -Why create this repository? Over the span of stable-baselines and stable-baselines3, the community has been eager -to contribute in form of better logging utilities, environment wrappers, extended support (e.g. different action spaces) -and learning algorithms. However sometimes these utilities were too niche to be considered for stable-baselines or -proved to be too difficult to integrate well into existing code without a mess. sb3-contrib aims to fix this by -not requiring the neatest code integration with existing code and not setting limits on what is too niche: almost everything -remotely useful goes! We hope this allows to extend the known quality of stable-baselines style and documentation beyond -the relatively small scope of utilities of the main repository. +A place for RL algorithms and tools that are considered experimental, e.g. implementations of the latest publications. Goal is to keep the simplicity, documentation and style of stable-baselines3 but for less matured implementations. + +### Why create this repository? + +Over the span of stable-baselines and stable-baselines3, the community has been eager to contribute in form of better logging utilities, environment wrappers, extended support (e.g. different action spaces) and learning algorithms. + +However sometimes these utilities were too niche to be considered for stable-baselines or +proved to be too difficult to integrate well into existing code without a mess. sb3-contrib aims to fix this by not requiring the neatest code integration with existing code and not setting limits on what is too niche: almost everything remotely useful goes! We hope this allows to extend the known quality of stable-baselines style and documentation beyond the relatively small scope of utilities of the main repository. ## Features See documentation for the full list of included features. -**Training algorithms**: +**RL Algorithms**: - [Truncated Quantile Critics (TQC)](https://arxiv.org/abs/2005.04269) + + + + ## Installation **Note:** You need the `master` version of [Stable Baselines3](https://github.com/DLR-RM/stable-baselines3/). @@ -40,6 +47,10 @@ Install Stable Baselines3 - Contrib using pip: pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib ``` +## How To Contribute + +If you want to contribute, please read [**CONTRIBUTING.md**](./CONTRIBUTING.md) guide first. + ## Citing the Project diff --git a/docs/index.rst b/docs/index.rst index 79c5f5d..d590f25 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,7 @@ Welcome to Stable Baselines3 Contrib docs! ========================================== -Contrib package for `Stable Baselines3 `_ - Experimental code. +Contrib package for `Stable Baselines3 (SB3) `_ - Experimental code. Github repository: https://github.com/Stable-Baselines-Team/stable-baselines3-contrib @@ -64,7 +64,7 @@ To cite this project in publications: Contributing ------------ -If you want to contribute, please read `CONTRIBUTING.md `_ first. +If you want to contribute, please read `CONTRIBUTING.md `_ first. Indices and tables ------------------- From 72fe9a2072293341964252583ed9237c5da7c4d7 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Sat, 17 Oct 2020 17:06:11 +0200 Subject: [PATCH 13/13] Faster tests --- tests/test_run.py | 6 +++--- tests/test_save_load.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_run.py b/tests/test_run.py index c2bc941..8d59976 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -14,7 +14,7 @@ def test_tqc(ent_coef): create_eval_env=True, ent_coef=ent_coef, ) - model.learn(total_timesteps=500, eval_freq=250) + model.learn(total_timesteps=300, eval_freq=250) @pytest.mark.parametrize("n_critics", [1, 3]) @@ -23,7 +23,7 @@ def test_n_critics(n_critics): model = TQC( "MlpPolicy", "Pendulum-v0", policy_kwargs=dict(net_arch=[64], n_critics=n_critics), learning_starts=100, verbose=1 ) - model.learn(total_timesteps=500) + model.learn(total_timesteps=300) def test_sde(): @@ -35,6 +35,6 @@ def test_sde(): learning_starts=100, verbose=1, ) - model.learn(total_timesteps=500) + model.learn(total_timesteps=300) model.policy.reset_noise() model.policy.actor.get_std() diff --git a/tests/test_save_load.py b/tests/test_save_load.py index 9a73c77..396bc62 100644 --- a/tests/test_save_load.py +++ b/tests/test_save_load.py @@ -43,7 +43,7 @@ def test_save_load(tmp_path, model_class): # create model model = model_class("MlpPolicy", env, policy_kwargs=dict(net_arch=[16]), verbose=1) - model.learn(total_timesteps=500) + model.learn(total_timesteps=300) env.reset() observations = np.concatenate([env.step([env.action_space.sample()])[0] for _ in range(10)], axis=0) @@ -152,7 +152,7 @@ def test_save_load(tmp_path, model_class): assert np.allclose(selected_actions, new_selected_actions, 1e-4) # check if learn still works - model.learn(total_timesteps=500) + model.learn(total_timesteps=300) del model @@ -224,7 +224,7 @@ def test_save_load_replay_buffer(tmp_path, model_class): path = pathlib.Path(tmp_path / "logs/replay_buffer.pkl") path.parent.mkdir(exist_ok=True, parents=True) # to not raise a warning model = model_class("MlpPolicy", select_env(model_class), buffer_size=1000) - model.learn(500) + model.learn(300) old_replay_buffer = deepcopy(model.replay_buffer) model.save_replay_buffer(path) model.replay_buffer = None @@ -268,7 +268,7 @@ def test_save_load_policy(tmp_path, model_class, policy_str): # create model model = model_class(policy_str, env, policy_kwargs=dict(net_arch=[16]), verbose=1, **kwargs) - model.learn(total_timesteps=500) + model.learn(total_timesteps=300) env.reset() observations = np.concatenate([env.step([env.action_space.sample()])[0] for _ in range(10)], axis=0)