From 161dc8f4f07d73b61f951c4de7f5951cf7598419 Mon Sep 17 00:00:00 2001 From: morand-g Date: Fri, 8 Nov 2024 15:52:35 +0100 Subject: [PATCH 1/8] Created regression_system and updated check_loss modified: malpolon/models/standard_prediction_systems.py modified: malpolon/models/utils.py --- .../models/standard_prediction_systems.py | 68 +++++++++++++++++++ malpolon/models/utils.py | 6 +- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index 0a0f0d24..1234ff22 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -449,3 +449,71 @@ def __init__( } super().__init__(model, loss, optimizer, metrics=metrics) + + +class RegressionSystem(GenericPredictionSystem): + """Regression task class.""" + def __init__( + self, + model: Union[torch.nn.Module, Mapping], + loss: Union[torch.nn.modules.loss._Loss, str], + optimizer: Union[torch.nn.Module, Mapping] = None, + lr: float = 1e-2, + weight_decay: float = 0, + metrics: Optional[dict[str, Callable]] = None, + task: str = 'regression_multilabel', + loss_kwargs: Optional[dict] = {}, + hparams_preprocess: bool = True, + checkpoint_path: Optional[str] = None + ): + """Class constructor. + Parameters + ---------- + model : dict + model to use + lr : float + learning rate + weight_decay : float + weight decay + momentum : float + value of momentum + nesterov : bool + if True, uses Nesterov's momentum + metrics : dict + dictionnary containing the metrics to compute. + Keys must match metrics' names and have a subkey with each + metric's functional methods as value. This subkey is either + created from the `malpolon.models.utils.FMETRICS_CALLABLES` + constant or supplied, by the user directly. + task : str, optional + Machine learning task (used to format labels accordingly), + by default 'classification_multiclass'. The value determines + the loss to be selected. if 'multilabel' or 'binary' is + in the task, the BCEWithLogitsLoss is selected, otherwise + the CrossEntropyLoss is used. + hparams_preprocess : bool, optional + if True performs preprocessing operations on the hyperparameters, + by default True + """ + if hparams_preprocess: + task = task.split('regression_')[1] + metrics = check_metric(metrics) + + self.lr = lr + self.weight_decay = weight_decay + + self.checkpoint_path = checkpoint_path + model = check_model(model) + + if optimizer is None: + print(f'[INFO] No optimizer provided: using AdamW with lr={lr}, weight_decay={weight_decay}') + optimizer = torch.optim.AdamW( + model.parameters(), + lr=self.lr, + weight_decay=self.weight_decay + ) + + + loss = check_loss(loss)(**loss_kwargs) + + super().__init__(model, loss, optimizer, metrics=metrics) \ No newline at end of file diff --git a/malpolon/models/utils.py b/malpolon/models/utils.py index 12f28439..00054b60 100644 --- a/malpolon/models/utils.py +++ b/malpolon/models/utils.py @@ -104,7 +104,7 @@ def check_metric(metrics: OmegaConf) -> OmegaConf: return metrics -def check_loss(loss: nn.modules.loss._Loss) -> nn.modules.loss._Loss: +def check_loss(loss: Union[nn.modules.loss._Loss, str]) -> nn.modules.loss._Loss: """Ensure input loss is a pytorch loss. Args: @@ -118,7 +118,9 @@ def check_loss(loss: nn.modules.loss._Loss) -> nn.modules.loss._Loss: """ if isinstance(loss, nn.modules.loss._Loss): # pylint: disable=protected-access # noqa return loss - raise ValueError(f"Loss must be of type nn.modules.loss. " + elif isinstance(loss, str): + return eval(loss) + raise ValueError(f"Loss must be of type nn.modules.loss or callable string" f"Loss given type {type(loss)} instead") From c5579437f0c62816afada152064254bb3c34bc87 Mon Sep 17 00:00:00 2001 From: morand-g Date: Tue, 12 Nov 2024 12:05:05 +0100 Subject: [PATCH 2/8] Updated code for pull request #71 modified: malpolon/models/standard_prediction_systems.py modified: malpolon/models/utils.py --- malpolon/models/standard_prediction_systems.py | 11 ++++++----- malpolon/models/utils.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index 1234ff22..bc28457f 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -462,7 +462,6 @@ def __init__( weight_decay: float = 0, metrics: Optional[dict[str, Callable]] = None, task: str = 'regression_multilabel', - loss_kwargs: Optional[dict] = {}, hparams_preprocess: bool = True, checkpoint_path: Optional[str] = None ): @@ -471,14 +470,14 @@ def __init__( ---------- model : dict model to use + loss : Union[torch.nn.modules.loss._Loss, str] + loss or string from the predifined LOSS_CALLABLES. + optimizer : Union[torch.nn.Module, Mapping] + optional custom optimizer to use for training lr : float learning rate weight_decay : float weight decay - momentum : float - value of momentum - nesterov : bool - if True, uses Nesterov's momentum metrics : dict dictionnary containing the metrics to compute. Keys must match metrics' names and have a subkey with each @@ -494,6 +493,8 @@ def __init__( hparams_preprocess : bool, optional if True performs preprocessing operations on the hyperparameters, by default True + checkpoint_path : str + Checkpoint to use for training or inference. """ if hparams_preprocess: task = task.split('regression_')[1] diff --git a/malpolon/models/utils.py b/malpolon/models/utils.py index 00054b60..45e2d51d 100644 --- a/malpolon/models/utils.py +++ b/malpolon/models/utils.py @@ -35,6 +35,11 @@ 'reduce_lr_on_plateau': lr_scheduler.ReduceLROnPlateau, 'cosine_annealing_lr': lr_scheduler.CosineAnnealingLR, } +LOSS_CALLABLES = {'huber_loss': nn.HuberLoss, + 'mse_loss': nn.MSELoss, + 'cross_entropy_loss': nn.CrossEntropyLoss, + 'bce_loss': nn.BCELoss, } + class CrashHandler(): """Saves the model in case of unexpected crash or user interruption.""" @@ -108,7 +113,7 @@ def check_loss(loss: Union[nn.modules.loss._Loss, str]) -> nn.modules.loss._Loss """Ensure input loss is a pytorch loss. Args: - loss (nn.modules.loss._Loss): input loss. + loss (Union[nn.modules.loss._Loss, str]): input loss. Raises: ValueError: if input loss isn't a pytorch loss object. @@ -119,7 +124,8 @@ def check_loss(loss: Union[nn.modules.loss._Loss, str]) -> nn.modules.loss._Loss if isinstance(loss, nn.modules.loss._Loss): # pylint: disable=protected-access # noqa return loss elif isinstance(loss, str): - return eval(loss) + if loss in LOSS_CALLABLES: + return(LOSS_CALLABLES[loss]) raise ValueError(f"Loss must be of type nn.modules.loss or callable string" f"Loss given type {type(loss)} instead") From a666126b38f0205c01174cb164a45d4e66ffaf3b Mon Sep 17 00:00:00 2001 From: tlarcher Date: Fri, 8 Nov 2024 16:53:12 +0100 Subject: [PATCH 3/8] Added malpolon arxiv paper citation --- README.md | 67 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 2db2b543..1544d43f 100644 --- a/README.md +++ b/README.md @@ -69,19 +69,22 @@ Before proceeding, please make sure the following packages are installed on your The following instructions show installation commands for Python 3.10, but can be adapted for any of the compatible Python versions mentioned above by simply changing the version number. ### Install from `PyPi` + The backend side of malpolon is distributed as a package on `PyPi`. To install it, simply run the following command: ```script pip install malpolon ``` -However, versions available on PyPi are non-experimental and possibly behind the repository's `main` and `dev` branches. To know which version you want download, please refer to the *tags* section of the repository and match it with PyPi. +However, versions available on PyPi are non-experimental and possibly behind the repository's `main` and `dev` branches. To know which version you want download, please refer to the _tags_ section of the repository and match it with PyPi. Furthermore, the PyPi package does not include the examples and the documentation. If you want to install the full repository, follow the next steps. ### Install from `GitHub` + #### 1. Clone the repository Clone the Malpolon repository using `git` in the directory of your choice: + ```script git clone https://github.com/plantnet/malpolon.git ``` @@ -102,6 +105,7 @@ source ./malpolon_3.10/bin/activate ``` Once the env is activated, install the python packages listed in `requirements_python3.10.txt`: + ```script pip install --upgrade setuptools pip install -r requirements_python3.10.txt @@ -175,11 +179,12 @@ make -C docs html The result can be found in `docs/_build/html`. - ## ⚒️ Troubleshooting + Commonly encountered errors when using the framework are compiled [here](examples/README.md#⚒️-troubleshooting). ## 🚀 Contributing + ### **Guidelines** Issues and PR templates are provided to help you start a contribution to the project. @@ -191,6 +196,7 @@ A checking script is also provided and can run checks relative to the 2 next sec ``` ### **Unit tests** +
Click here to expand instructions @@ -199,6 +205,7 @@ When submitting, make sure the unit tests all pass without errors. These tests a ```bash ./checkMyCode.sh t # or `pytest malpolon/tests/` ``` + Specify a file path as argument to run a single test file: ```bash @@ -233,6 +240,7 @@ Run linters on non-test file(s) : ```bash ./checkMyCode.sh ``` +
## 🚆 Roadmap @@ -244,13 +252,13 @@ This roadmap outlines the planned features and milestones for the project. Pleas
- [ ] Data support - - [x] Images (pre-extracted patches) - - [x] Rasters - - [ ] Time series - - [x] Via GLC23 loaders (.csv) - - [ ] Via generic loader - - [ ] Shapefiles - - [ ] Fuse several data types in one training + - [x] Images (pre-extracted patches) + - [x] Rasters + - [ ] Time series + - [x] Via GLC23 loaders (.csv) + - [ ] Via generic loader + - [ ] Shapefiles + - [ ] Fuse several data types in one training - [ ] Deep learning tasks - [x] Binary classification - [x] Multi-class classification @@ -273,15 +281,16 @@ This roadmap outlines the planned features and milestones for the project. Pleas ## Libraries -Here is an overview of the main Python libraries used in this project. -* [![PyTorch](https://img.shields.io/badge/PyTorch-%23ee4c2c.svg?logo=pytorch&logoColor=white)](https://pytorch.org/) - To handle deep learning loops and dataloaders -* [![PyTorch Lightning](https://img.shields.io/badge/PyTorch%20Lightning-%23792EE5.svg?logo=lightning&logoColor=white)](https://lightning.ai/docs/pytorch/stable/) - Deep learning framework which simplifies the usage of PyTorch elements -* [![Numpy](https://img.shields.io/badge/Numpy-%234D77CF.svg?logo=numpy&logoColor=white)](https://numpy.org/) - For common computational operations -* [![Torchgeo](https://img.shields.io/badge/Torchgeo-%23EE4C2C.svg?logo=torchgeo&logoColor=white)](https://torchgeo.readthedocs.io/en/stable/) - To handle data rasters -* [![Matplotlib](https://img.shields.io/badge/Matplotlib-%2311557C.svg?logo=matplotlib&logoColor=white)](https://matplotlib.org/) - For displaying purposes -* [![Hydra](https://img.shields.io/badge/Hydra-%23729DB1.svg?logo=hydra&logoColor=white)](https://hydra.cc/docs/intro/) - To handle models' hyperparameters -* [![Cartopy](https://img.shields.io/badge/Cartopy-%2300A1D9.svg?logo=cartopy&logoColor=white)](https://scitools.org.uk/cartopy/docs/latest/) - To handle geographical data +Here is an overview of the main Python librairies used in this project. + +- [![PyTorch](https://img.shields.io/badge/PyTorch-%23ee4c2c.svg?logo=pytorch&logoColor=white)](https://pytorch.org/) - To handle deep learning loops and dataloaders +- [![PyTorch Lightning](https://img.shields.io/badge/PyTorch%20Lightning-%23792EE5.svg?logo=lightning&logoColor=white)](https://lightning.ai/docs/pytorch/stable/) - Deep learning framework which simplifies the usage of PyTorch elements +- [![Numpy](https://img.shields.io/badge/Numpy-%234D77CF.svg?logo=numpy&logoColor=white)](https://numpy.org/) - For common computational operations +- [![Torchgeo](https://img.shields.io/badge/Torchgeo-%23EE4C2C.svg?logo=torchgeo&logoColor=white)](https://torchgeo.readthedocs.io/en/stable/) - To handle data rasters +- [![Matplotlib](https://img.shields.io/badge/Matplotlib-%2311557C.svg?logo=matplotlib&logoColor=white)](https://matplotlib.org/) - For displaying purposes +- [![Hydra](https://img.shields.io/badge/Hydra-%23729DB1.svg?logo=hydra&logoColor=white)](https://hydra.cc/docs/intro/) - To handle models' hyperparameters +- [![Cartopy](https://img.shields.io/badge/Cartopy-%2300A1D9.svg?logo=cartopy&logoColor=white)](https://scitools.org.uk/cartopy/docs/latest/) - To handle geographical data ## Acknowledgments @@ -303,15 +312,31 @@ This work is currently under development and maintained by the [Pl@ntNet](https: ## Licensing -This framework is distributed under the [MIT license](https://opensource.org/license/mit/), as is the Pl@ntNet project. See LICENSE.md for more information. + +This framework is ditributed under the [MIT license](https://opensource.org/license/mit/), as is the Pl@ntNet project. See LICENSE.md for more information. ## Citation & credits -Malpolon is a project developed by the [Pl@ntNet](https://plantnet.org/) team within the [INRIA](https://www.inria.fr/en) research institute. If you use this framework in your research, please cite this repository in your paper. + +Malpolon is a project developed by the [Pl@ntNet](https://plantnet.org/) team within the [INRIA](https://www.inria.fr/en) research institute. If you use this framework in your research, please cite our software paper: + +```latex +@misc{larcher2024malpolonframeworkdeepspecies, + title={MALPOLON: A Framework for Deep Species Distribution Modeling}, + author={Theo Larcher and Lukas Picek and Benjamin Deneu and Titouan Lorieul and Maximilien Servajean and Alexis Joly}, + year={2024}, + eprint={2409.18102}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/2409.18102}, +} +``` Authors include : -- [Théo Larcher](https://github.com/tlarcher) (current lead developer) ([email](mailto:theo.larcher@inria.fr)) + +- [Théo Larcher](https://github.com/tlarcher) (current lead developper) ([email](mailto:theo.larcher@inria.fr)) - [Maximilien Servajean](https://github.com/maximiliense) - [Alexis Joly](https://github.com/alexisjoly) Former developpers include : -- [Titouan Lorieul](https://github.com/tlorieul) (former lead developer) ([email](mailto:titouan.lorieul@gmail.com)) + +- [Titouan Lorieul](https://github.com/tlorieul) (former lead developper) ([email](mailto:titouan.lorieul@gmail.com)) From 748054891af321dcca4f710d820a9cbbd3dd83a4 Mon Sep 17 00:00:00 2001 From: morand-g Date: Tue, 12 Nov 2024 14:21:31 +0100 Subject: [PATCH 4/8] modified: malpolon/models/standard_prediction_systems.py modified: malpolon/models/utils.py --- malpolon/models/standard_prediction_systems.py | 3 +++ malpolon/models/utils.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index bc28457f..7caf1f96 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -462,6 +462,7 @@ def __init__( weight_decay: float = 0, metrics: Optional[dict[str, Callable]] = None, task: str = 'regression_multilabel', + loss_kwargs: Optional[dict] = {}, hparams_preprocess: bool = True, checkpoint_path: Optional[str] = None ): @@ -490,6 +491,8 @@ def __init__( the loss to be selected. if 'multilabel' or 'binary' is in the task, the BCEWithLogitsLoss is selected, otherwise the CrossEntropyLoss is used. + loss_kwargs: Optional[dict] = {} + Arguments to be passed to loss constructor. hparams_preprocess : bool, optional if True performs preprocessing operations on the hyperparameters, by default True diff --git a/malpolon/models/utils.py b/malpolon/models/utils.py index 45e2d51d..ecda1bfc 100644 --- a/malpolon/models/utils.py +++ b/malpolon/models/utils.py @@ -126,7 +126,7 @@ def check_loss(loss: Union[nn.modules.loss._Loss, str]) -> nn.modules.loss._Loss elif isinstance(loss, str): if loss in LOSS_CALLABLES: return(LOSS_CALLABLES[loss]) - raise ValueError(f"Loss must be of type nn.modules.loss or callable string" + raise ValueError(f"Loss must be of type nn.modules.loss or string from LOSS_CALLABLES" f"Loss given type {type(loss)} instead") From 6c1f48869fe5f046c3695572d200cb39b98116b7 Mon Sep 17 00:00:00 2001 From: morand-g Date: Tue, 12 Nov 2024 14:29:05 +0100 Subject: [PATCH 5/8] modified: malpolon/models/standard_prediction_systems.py --- malpolon/models/standard_prediction_systems.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index 7caf1f96..f36ec8dc 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -461,9 +461,7 @@ def __init__( lr: float = 1e-2, weight_decay: float = 0, metrics: Optional[dict[str, Callable]] = None, - task: str = 'regression_multilabel', loss_kwargs: Optional[dict] = {}, - hparams_preprocess: bool = True, checkpoint_path: Optional[str] = None ): """Class constructor. @@ -485,23 +483,13 @@ def __init__( metric's functional methods as value. This subkey is either created from the `malpolon.models.utils.FMETRICS_CALLABLES` constant or supplied, by the user directly. - task : str, optional - Machine learning task (used to format labels accordingly), - by default 'classification_multiclass'. The value determines - the loss to be selected. if 'multilabel' or 'binary' is - in the task, the BCEWithLogitsLoss is selected, otherwise - the CrossEntropyLoss is used. loss_kwargs: Optional[dict] = {} Arguments to be passed to loss constructor. - hparams_preprocess : bool, optional - if True performs preprocessing operations on the hyperparameters, - by default True checkpoint_path : str Checkpoint to use for training or inference. """ - if hparams_preprocess: - task = task.split('regression_')[1] - metrics = check_metric(metrics) + + metrics = check_metric(metrics) self.lr = lr self.weight_decay = weight_decay From 422f4c4684e08b9ea8271cde7d00f1f155e6f4ec Mon Sep 17 00:00:00 2001 From: morand-g Date: Wed, 4 Dec 2024 11:51:56 +0100 Subject: [PATCH 6/8] Remove unused "checkpoint_path" argument in RegressionSystem --- malpolon/models/standard_prediction_systems.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index f36ec8dc..229d0c38 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -462,7 +462,6 @@ def __init__( weight_decay: float = 0, metrics: Optional[dict[str, Callable]] = None, loss_kwargs: Optional[dict] = {}, - checkpoint_path: Optional[str] = None ): """Class constructor. Parameters @@ -485,8 +484,6 @@ def __init__( constant or supplied, by the user directly. loss_kwargs: Optional[dict] = {} Arguments to be passed to loss constructor. - checkpoint_path : str - Checkpoint to use for training or inference. """ metrics = check_metric(metrics) @@ -494,7 +491,6 @@ def __init__( self.lr = lr self.weight_decay = weight_decay - self.checkpoint_path = checkpoint_path model = check_model(model) if optimizer is None: From 62913ae63ff6601fd06c1c8f73d1c89ff9d702f8 Mon Sep 17 00:00:00 2001 From: morand-g Date: Wed, 4 Dec 2024 16:24:14 +0100 Subject: [PATCH 7/8] Added RegressionSystem to models.__init__ --- malpolon/models/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/malpolon/models/__init__.py b/malpolon/models/__init__.py index 49e322ff..62da981c 100644 --- a/malpolon/models/__init__.py +++ b/malpolon/models/__init__.py @@ -1,7 +1,9 @@ from .standard_prediction_systems import (ClassificationSystem, + RegressionSystem, GenericPredictionSystem) __all__ = [ # noqa: F405 "GenericPredictionSystem", "ClassificationSystem", + "RegressionSystem", ] From 88f51dfae8ffebc42fa80147ad5a7058604456d0 Mon Sep 17 00:00:00 2001 From: morand-g Date: Mon, 9 Dec 2024 17:09:17 +0100 Subject: [PATCH 8/8] modified: malpolon/models/standard_prediction_systems.py Updated loss check in RegressionSystem.__init__ to allow custom losses --- malpolon/models/standard_prediction_systems.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/malpolon/models/standard_prediction_systems.py b/malpolon/models/standard_prediction_systems.py index 229d0c38..9e40c56f 100644 --- a/malpolon/models/standard_prediction_systems.py +++ b/malpolon/models/standard_prediction_systems.py @@ -501,7 +501,10 @@ def __init__( weight_decay=self.weight_decay ) - - loss = check_loss(loss)(**loss_kwargs) + if isinstance(loss, torch.nn.modules.loss._Loss): + # If loss is already instantiated, no need to provide kwargs + loss = check_loss(loss) + else: + loss = check_loss(loss)(**loss_kwargs) super().__init__(model, loss, optimizer, metrics=metrics) \ No newline at end of file