From 0160dcfabaea420e98d8051228bd8a62c51312aa Mon Sep 17 00:00:00 2001 From: tlarcher Date: Fri, 22 Nov 2024 15:20:52 +0100 Subject: [PATCH 01/20] Updated setup.py for v2.1.2 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a867da7a..5c2fe56b 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ from setuptools import find_packages, setup setup(name="malpolon", - version="2.1.1", - description="Malpolon v2.1.1", + version="2.1.2", + description="Malpolon v2.1.2", author="Theo Larcher, Titouan Lorieul, Benjamin Deneu, Lukas Picek", author_email="theo.larcher@inria.fr, titouan.lorieul@gmail.com, benjamin.deneu@wsl.ch, lukas.picek@inria.fr", url="https://github.com/plantnet/malpolon", From 6fce0421acbfabed6609017f8c18e8a772ca45ec Mon Sep 17 00:00:00 2001 From: tlarcher Date: Mon, 7 Apr 2025 18:04:18 +0200 Subject: [PATCH 02/20] Added GLC25 related stuff --- .../geolifeclef2025_pre_extracted/.gitignore | 8 + .../config/glc24_cnn_multimodal_ensemble.yaml | 91 +++ .../evaluate_inference_MME.py | 91 +++ .../evaluate_inference_MME_habitat.py | 134 +++++ .../glc25_cnn_multimodal_ensemble.py | 106 ++++ .../datasets/geolifeclef2025_pre_extracted.py | 518 ++++++++++++++++++ ...ompute_mean_std_iteratively_from_sample.py | 36 +- 7 files changed, 978 insertions(+), 6 deletions(-) create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME.py create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME_habitat.py create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py create mode 100644 malpolon/data/datasets/geolifeclef2025_pre_extracted.py diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore new file mode 100644 index 00000000..5e3111a1 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore @@ -0,0 +1,8 @@ +# Data +dataset/geolifeclef-2025/BioclimTimeSeries +dataset/geolifeclef-2025/EnvironmentalValues +dataset/geolifeclef-2025/SatellitePatches +dataset/geolifeclef-2025/SatelliteTimeSeries-Landsat +dataset/geolifeclef-2025/*.csv + +dataset/geolifeclef-2025/stats/fps* diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml new file mode 100644 index 00000000..47db8020 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml @@ -0,0 +1,91 @@ +hydra: + run: + dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S} + +run: + predict: false + checkpoint_path: # "outputs/glc25_cnn_multimodal_ensemble/???/last.ckpt" + +data: + root: "dataset/geolifeclef-2025/" + data_paths: + train: + landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-train/" + bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-train/" + sentinel_data_dir: "${data.root}SatelitePatches/PA-train/" + test: + landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-test/" + bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-test/" + sentinel_data_dir: "${data.root}SatelitePatches/PA-test/" + metadata_paths: + train: "${data.root}GLC25_PA_metadata_train_train-10.0min.csv" + val: "${data.root}GLC25_PA_metadata_train_val-10.0min.csv" + test: "${data.root}GLC25_PA_metadata_test.csv" + num_classes: &num_classes 11255 + download_data: True + train_batch_size: 64 + inference_batch_size: 16 + num_workers: 16 + +task: + task: "classification_multilabel" # ['classification_binary', 'classification_multiclass', 'classification_multilabel'] + +trainer: + # gpus: 1 # Deprecated since pytorchlightning 1.7, removed in 2.0. Replaced by the 2 next attributes + accelerator: "gpu" + devices: 'auto' + max_epochs: 20 + val_check_interval: 100 + check_val_every_n_epoch: 1 + # log_every_n_steps: 100 + +model: + provider_name: "malpolon" # choose from ["malpolon", "timm", "torchvision"] + model_name: "glc24_multimodal_ensemble" + model_kwargs: + pretrained: true # Deprecated in torchvision since 0.13 (replaced by "weights") but used by timm + modifiers: + change_last_layer: + num_outputs: *num_classes + +optim: + loss_kwargs: + pos_weight: 10.0 + optimizer: + adamw: + kwargs: + lr: 0.00025 + scheduler: + cosine_annealing_lr: + kwargs: + T_max: 25 + verbose: True + metrics: + multilabel_accuracy: + # callable: 'Fmetrics.classification.multilabel_accuracy' + kwargs: + num_labels: *num_classes + # threshold: 0.1 + average: micro + multilabel_recall: + callable: 'Fmetrics.classification.multilabel_recall' + kwargs: + num_labels: *num_classes + # threshold: 0.1 + average: micro + multilabel_precision: + callable: 'Fmetrics.classification.multilabel_precision' + kwargs: + num_labels: *num_classes + # threshold: 0.1 + average: micro + multilabel_f1-score: + callable: 'Fmetrics.classification.multilabel_f1_score' + kwargs: + num_labels: *num_classes + # threshold: 0.1 + average: micro + +loggers: + exp_name: "GLC25_MME" # Name of your experiment + log_dir_name: "tensorboard_logs/" # Name of the logs directory diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME.py new file mode 100644 index 00000000..2fd2db98 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME.py @@ -0,0 +1,91 @@ +"""This script computes metrics off of model inference predictions. + +It computes the Precision, Recall, F1-score (micro, samples and macro) +for the top-25 predictions of a model inference predictions (in a CSV); +as well as the AUC (micro, samples and macro) for all the probabilities +(not just the top-25). + +Author: Theo Larcher + Alexis Joly +""" +from copy import deepcopy + +import numpy as np +import pandas as pd +from sklearn.metrics import precision_recall_fscore_support, roc_auc_score +from tqdm import tqdm + +# 0. Load data +df_gt = pd.read_csv('predictions_and_evaluation/GLC24_SOLUTION_FILE.csv') +df_preds = pd.read_csv('predictions_and_evaluation/predictions_GLC24_SOLUTION_FILE.csv', sep=';') +for rowi, row in deepcopy(df_gt).iterrows(): + tsi = np.array(row['target_species_ids'].split()).astype(int) # Split the predictions string by space and convert to int + inds = np.where(tsi > 11254)[0] + vals = tsi[inds] + if inds.size > 0: + df_gt = df_gt.drop(rowi) + df_preds = df_preds.drop(rowi) + print(f"obs {rowi} of surveyId {row['surveyId']} removed because target_species_ids value {vals} out of range") + + +# 1. Convert data to usable types and compute one-hot encodings +res = pd.DataFrame(columns=['Precision_micro', 'Recall_micro', 'F1_micro', + 'Precision_samples', 'Recall_samples', 'F1_samples', + 'Precision_macro', 'Recall_macro', 'F1_macro', + 'AUC_micro', 'AUC_samples', 'AUC_macro']) +obs_id = df_gt['surveyId'] + +targets = df_gt['target_species_ids'] +targets = [list(map(int, x.split())) for x in targets] + +preds = df_preds['predictions'] +preds = np.array([list(map(int, x.split())) for x in preds]) + +probas = df_preds['probas'] +probas = np.array([list(map(float, x.split())) for x in probas]) + +all_targets_oh = np.zeros((len(df_gt), 11255)) +all_probas = np.zeros_like(probas) +all_predictions_top25_oh = np.zeros((len(df_preds), 11255)) + +for k, (p, t) in tqdm(enumerate(zip(preds, targets)), total=len(targets)): + all_probas[k] = probas[k][np.argsort(p)] + for t2 in t: + all_targets_oh[k, t2] = 1 + for p2 in p[:25]: + all_predictions_top25_oh[k, p2] = 1 + +# 2. Compute Precision / Recall / F1-score +print('\nComputing Precision, Recall, F1-scores...') +prfs = {} +for avg in ['micro', 'samples', 'macro']: + prf = precision_recall_fscore_support(all_targets_oh, all_predictions_top25_oh, average=avg, zero_division=np.nan)[:3] + prfs[f'Precision_{avg}'] = prf[0] + prfs[f'Recall_{avg}'] = prf[1] + prfs[f'F1_{avg}'] = prf[2] + print(f"{avg.upper()}: Precision, Recall, F1", prf) + + +# 3. Compute AUCs +print('\nComputing AUCs...') +# Find rows and columns with all zeros in both arrays, that is to say +# species that are never observed in any plot according to the ground truth +zero_cols_targets = np.all(all_targets_oh == 0, axis=0) +ones_cols_targets = np.all(all_targets_oh == 1, axis=0) +zero_cols = zero_cols_targets | ones_cols_targets +# Filter out rows and columns containing only zeros +filtered_targets = all_targets_oh[:][:, ~zero_cols] +filtered_probas = all_probas[:][:, ~zero_cols] +filtered_predictions_top25 = all_predictions_top25_oh[:][:, ~zero_cols] + +aucs = {} +for avg in ['micro', 'samples', 'macro']: + auc = roc_auc_score(filtered_targets, filtered_probas, average=avg) + aucs[f'AUC_{avg}'] = auc + print(f"{avg.upper()}: AUC", auc) + + +# 4. Save results +res.loc[0] = prfs | aucs +res.to_csv('Inference_PRC-AUC.csv', index=False) +print('\nResults saved to Inference_PRC-AUC.csv') diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME_habitat.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME_habitat.py new file mode 100644 index 00000000..dd6c7427 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/evaluate_inference_MME_habitat.py @@ -0,0 +1,134 @@ +"""This script computes metrics off of model inference predictions. + +It computes the Precision, Recall, F1-score (micro, samples and macro) +for the top-k predictions of a model inference predictions (in a CSV); +as well as the AUC (micro, samples and macro) for all the probabilities +(not just the top-k). + +Author: Theo Larcher + Alexis Joly +""" +from copy import deepcopy + +import numpy as np +import pandas as pd +from sklearn.metrics import (accuracy_score, precision_recall_fscore_support, + roc_auc_score, top_k_accuracy_score) + +# Constant variables +N_CLS = 174 +TOP_K = [1, 3, 5] +TASK = 'multiclass' + +# Print colors +INFO = '\033[93m' +RESET = '\033[0m' +LINK = '\033[94m' +BOLD = "\033[1m" + +# 0. Load data +df = pd.read_csv('predictions_and_evaluation/predictions_GLC24_SOLUTION_FILE.csv') +df['target_habitat_id'] = df['target_habitat_id'].astype(str) +df_gt = df.copy() +df_preds = df.copy() + +for rowi, row in deepcopy(df_gt).iterrows(): + tsi = np.array(row['target_habitat_id'].split()).astype(int) # Split the predictions string by space and convert to int + inds = np.where(tsi > (N_CLS - 1))[0] + vals = tsi[inds] + if inds.size > 0: + df_gt = df_gt.drop(rowi) + df_preds = df_preds.drop(rowi) + print(f"obs {rowi} of surveyId {row['surveyId']} removed because target_habitat_id value {vals} out of range") + +targets = df_gt['target_habitat_id'] +targets = [list(map(int, str(x).split())) for x in targets] + +preds = df_preds['predictions'] +preds = np.array([list(map(int, str(x).split())) for x in preds]) + +probas = df_preds['probas'] +probas = np.array([list(map(float, str(x).split())) for x in probas]) + +all_targets_oh = np.zeros((len(df_gt), N_CLS)) +all_probas = np.zeros_like(probas) +all_predictions_topk_oh = np.zeros((len(df_preds), N_CLS)) + + +if TASK == 'multilabel': + # 1. Convert data to usable types and compute one-hot encodings + res = pd.DataFrame(columns=['Precision_micro', 'Recall_micro', 'F1_micro', + 'Precision_samples', 'Recall_samples', 'F1_samples', + 'Precision_macro', 'Recall_macro', 'F1_macro', + 'AUC_micro', 'AUC_samples', 'AUC_macro']) + + idx = np.arange(len(all_predictions_topk_oh)).reshape(-1, 1) + all_targets_oh[idx, targets] = 1 # One-hot encode the targets + all_probas = probas[idx, np.argsort(preds, axis=1)] # Sort the probabilities in class order + + # 2. Compute Precision / Recall / F1-score + print('\nComputing Precision, Recall, F1-scores...') + prfs = {} + for topk in TOP_K: + for avg in ['micro', 'samples', 'macro']: + prf = precision_recall_fscore_support(all_targets_oh, all_predictions_topk_oh[:, :topk], average=avg, zero_division=np.nan)[:3] + prfs[f'Precision_{avg}_top-{topk}'] = prf[0] + prfs[f'Recall_{avg}_top-{topk}'] = prf[1] + prfs[f'F1_{avg}_top-{topk}'] = prf[2] + print(f"Top-{topk} {avg.upper()}: Precision, Recall, F1", prf) + + print('\nComputing Accuracy...') + acc = accuracy_score(targets, preds) + + # 3. Compute AUCs + print('\nComputing AUCs...') + # Find rows and columns with all zeros in both arrays, that is to say + # species that are never observed in any plot according to the ground truth + zero_cols_targets = np.all(all_targets_oh == 0, axis=0) + ones_cols_targets = np.all(all_targets_oh == 1, axis=0) + zero_cols = zero_cols_targets | ones_cols_targets + # Filter out rows and columns containing only zeros + filtered_targets = all_targets_oh[:][:, ~zero_cols] + filtered_probas = all_probas[:][:, ~zero_cols] + + aucs = {} + for avg in ['micro', 'samples', 'macro']: + auc = roc_auc_score(filtered_targets, filtered_probas, average=avg) + aucs[f'AUC_{avg}'] = auc + print(f"{avg.upper()}: AUC", auc) + + # 4. Save results + res.loc[0] = prfs | aucs + res.to_csv('Inference_PRC-AUC.csv', index=False) + print('\nResults saved to Inference_PRC-AUC.csv') + +elif TASK == 'multiclass': + # 1. Convert data to usable types and compute one-hot encodings + idx = np.arange(len(all_predictions_topk_oh)).reshape(-1, 1) + all_targets_oh[idx, targets] = 1 # One-hot encode the targets + all_probas = probas[idx, np.argsort(preds, axis=1)] # Sort the probabilities in class order + + # 2. Compute Precision / Recall / F1-score + print(f'{INFO}{BOLD}\nComputing Precision, Recall, F1-scores...{RESET}') + prfs = {} + for topk in TOP_K: + for avg in ['micro', 'samples', 'macro']: + all_predictions_topk_oh[idx, preds[:, :topk]] = 1 # One-hot encode the top-k predictions + prf = precision_recall_fscore_support(all_targets_oh, all_predictions_topk_oh, average=avg, zero_division=np.nan)[:3] + prfs[f'Precision_{avg}_top-{topk}'] = prf[0] + prfs[f'Recall_{avg}_top-{topk}'] = prf[1] + prfs[f'F1_{avg}_top-{topk}'] = prf[2] + print(f"Top-{topk} {avg.upper()}: Precision, Recall, F1", prf) + print("") + + print(f'{INFO}{BOLD}\nComputing Top-k Accuracy...{RESET}') + accs = {} + for topk in TOP_K: + acc = top_k_accuracy_score(targets, all_probas, k=topk, labels=np.arange(N_CLS)) + accs[f'Accuracy_multiclass_top-{topk}'] = acc + print(f"Top-{topk} Accuracy_multiclass: {acc}") + + # 4. Save results + res = pd.DataFrame({k: [v] for k, v in (prfs | accs).items()}) + res.to_csv('Inference_PRC-ACC.csv', index=False) + print('\nResults saved to Inference_PRC-ACC.csv') diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py new file mode 100644 index 00000000..d575277e --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py @@ -0,0 +1,106 @@ +"""Main script to run training or inference on GLC24 pre_extracted dataset. + +This script runs the GeoLifeCLEF2024 pre-extracted dataset to predict +species distribution using the Multi-Modal Ensemble model (MME). + +Author: Theo Larcher +""" + +import logging + +import hydra +import numpy as np +import pytorch_lightning as pl +import torch +from omegaconf import DictConfig +from pytorch_lightning.callbacks import ModelCheckpoint + +from malpolon.data.datasets.geolifeclef2025_pre_extracted import \ + GLC25Datamodule +from malpolon.logging import Summary +from malpolon.models.custom_models.glc2024_pre_extracted_prediction_system import \ + ClassificationSystemGLC24 + + +def set_seed(seed): + import lightning.pytorch as pl + from lightning.pytorch import seed_everything + + # Set seed for Python's built-in random number generator + torch.manual_seed(seed) + # Set seed for numpy + np.random.seed(seed) + seed_everything(seed, workers=True) + # Set seed for CUDA if available + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + # Set cuDNN's random number generator seed for deterministic behavior + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +@hydra.main(version_base="1.3", config_path="config/", config_name="glc24_cnn_multimodal_ensemble") +def main(cfg: DictConfig) -> None: + """Run main script used for either training or inference. + + Parameters + ---------- + cfg : DictConfig + hydra config dictionary created from the .yaml config file + associated with this script. + """ + set_seed(69) + # Loggers + log_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir + log_dir = log_dir.split(hydra.utils.get_original_cwd())[1][1:] # Transforming absolute path to relative path + logger_csv = pl.loggers.CSVLogger(log_dir, name="", version=cfg.loggers.exp_name) + logger_csv.log_hyperparams(cfg) + logger_tb = pl.loggers.TensorBoardLogger(log_dir, name=cfg.loggers.log_dir_name, version=cfg.loggers.exp_name) + logger_tb.log_hyperparams(cfg) + logger = logging.getLogger("lightning.pytorch.core") + logger.addHandler(logging.FileHandler(f"{log_dir}/core.log")) + + # Datamodule & Model + datamodule = GLC24Datamodule(**cfg.data, **cfg.task) + classif_system = ClassificationSystemGLC24(cfg.model, **cfg.optim, + checkpoint_path=cfg.run.checkpoint_path, + weights_dir=log_dir) # multilabel + + # Lightning Trainer + callbacks = [ + Summary(), + ModelCheckpoint( + dirpath=log_dir, + filename="checkpoint-{epoch:02d}-{step}-{" + f"loss/val" + ":.4f}", + monitor=f"loss/val", + mode="min", + save_on_train_epoch_end=True, + save_last=True, + every_n_train_steps=100, + ), + ] + trainer = pl.Trainer(logger=[logger_csv, logger_tb], callbacks=callbacks, **cfg.trainer, deterministic=True) + + # Run + if cfg.run.predict: + model_loaded = ClassificationSystemGLC24.load_from_checkpoint(classif_system.checkpoint_path, + model=classif_system.model, + hparams_preprocess=False, + strict=False, + weights_dir=log_dir) + + predictions = model_loaded.predict(datamodule, trainer) + preds, probas = datamodule.predict_logits_to_class(predictions, + np.arange(cfg.data.num_classes), + activation_fn=torch.nn.Sigmoid()) + datamodule.export_predict_csv(preds, probas, + out_dir=log_dir, out_name='predictions_test_dataset', top_k=25, return_csv=True) + print('Test dataset prediction (extract) : ', predictions[:1]) + + else: + trainer.fit(classif_system, datamodule=datamodule, ckpt_path=classif_system.checkpoint_path) + trainer.validate(classif_system, datamodule=datamodule) + + +if __name__ == "__main__": + main() diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py new file mode 100644 index 00000000..aea93d81 --- /dev/null +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -0,0 +1,518 @@ +"""This module provides Datasets and Datamodule for GeoLifeCLEF2024 data. + +Author: Lukas Picek + Theo Larcher + +License: GPLv3 +Python version: 3.10.6 +""" +import os +import subprocess +from pathlib import Path +from typing import Callable +import rasterio +import numpy as np +import pandas as pd +import torch +from sklearn.preprocessing import LabelEncoder +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms +from torchvision.datasets.utils import (download_and_extract_archive, + extract_archive) +from torchvision.io import read_image + +from malpolon.data.data_module import BaseDataModule +from malpolon.data.utils import split_obs_spatially + + +def construct_patch_path(data_path, survey_id): + """Construct the patch file path. + + File path is reconstructed based on plot_id as './CD/AB/XXXXABCD.jpeg'. + + Parameters + ---------- + data_path : str + root path + survey_id : int + observation id + + Returns + ------- + (str) + patch path + """ + path = data_path + for d in (str(survey_id)[-2:], str(survey_id)[-4:-2]): + path = os.path.join(path, d) + path = os.path.join(path, f"{survey_id}.tiff") + + return path + +def quantile_normalize(band, low=2, high=98): + sorted_band = np.sort(band.flatten()) + quantiles = np.percentile(sorted_band, np.linspace(low, high, len(sorted_band))) + normalized_band = np.interp(band.flatten(), sorted_band, quantiles).reshape(band.shape) + + min_val, max_val = np.min(normalized_band), np.max(normalized_band) + + # Prevent division by zero if min_val == max_val + if max_val == min_val: + return np.zeros_like(normalized_band, dtype=np.float32) # Return an array of zeros + + # Perform normalization (min-max scaling) + return ((normalized_band - min_val) / (max_val - min_val)).astype(np.float32) + +def quantile_normalize(band): + band = np.array(band, dtype=np.float32) + min_val = np.nanmin(band) # Use nanmin to ignore NaNs + max_val = np.nanmax(band) # Use nanmax to ignore NaNs + + if max_val == min_val: + return np.zeros_like(band) # If max and min are the same, return an array of zeros + + return ((band - min_val) / (max_val - min_val)).astype(np.float32) + +def load_landsat(path, transform=None): + """Load Landsat pre-extracted time series data. + + Loads pre-extracted time series data from Landsat satellite + time series, stored as torch tensors. + + Parameters + ---------- + path : str + path to data cube + transform : callable, optional + data transform, by default None + + Returns + ------- + (array) + numpy array of loaded transformed data + """ + landsat_sample = torch.nan_to_num(torch.load(path)) + if isinstance(landsat_sample, torch.Tensor): + landsat_sample = landsat_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) + landsat_sample = landsat_sample.numpy() # Convert tensor to numpy array + if transform: + landsat_sample = transform(landsat_sample) + return landsat_sample + + +def load_bioclim(path, transform=None): + """Load Bioclim pre-extracted time series data. + + Loads pre-extracted time series data from bioclim environmental + time series, stored as torch tensors. + + Parameters + ---------- + path : str + path to data cube + transform : callable, optional + data transform, by default None + + Returns + ------- + (array) + numpy array of loaded transformed data + """ + bioclim_sample = torch.nan_to_num(torch.load(path), weights_only=True) + if isinstance(bioclim_sample, torch.Tensor): + bioclim_sample = bioclim_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) + bioclim_sample = bioclim_sample.numpy() # Convert tensor to numpy array + if transform: + bioclim_sample = transform(bioclim_sample) + return bioclim_sample + + +def load_sentinel(path, transform=None): + """Load Sentinel-2A pre-extracted patch data. + + Loads pre-extracted data from Sentinel-2A satellite image patches, + stored as image patches. + + Parameters + ---------- + path : str + path to data cube + survey_id: str + observation id which identifies the patch to load + transform : callable, optional + data transform, by default None + + Returns + ------- + (array) + numpy array of loaded transformed data + """ + with rasterio.open(path) as dataset: + image = dataset.read(out_dtype=np.float32) # Read all bands + image = np.array([quantile_normalize(band) for band in image]) # Apply quantile normalization + + image = np.transpose(image, (1, 2, 0)) # Convert to HWC format + if transform: + image = transform(image) + return image + + +class TrainDataset(Dataset): + """Train dataset with training transform functions. + + Inherits Dataset. + + Returns + ------- + (tuple) + tuple of data samples (landsat, bioclim, sentinel), label tensor (speciesId) and surveyId + """ + num_classes = 11255 + + def __init__( + self, + metadata: pd.DataFrame, + num_classes: int = 11255, + bioclim_data_dir: str = None, + landsat_data_dir: str = None, + sentinel_data_dir: str = None, + transform: Callable = None, + subset: str = 'train', # train or val + task: str = 'classification_multilabel', + **kwargs, + ): + """Class constructor. + + Parameters + ---------- + metadata : pd.DataFrame + observation dataframe. + num_classes : int, optional + number of unique labels in the dataset, by default 11255 + bioclim_data_dir : str, optional + path to the bioclim dataset directory, by default None + landsat_data_dir : str, optional + path to the landsat dataset directory, by default None + sentinel_data_dir : str, optional + path to the sentinel dataset directory, by default None + transform : Callable, optional + transform function to apply to the data, by default None + task : str, optional + deep learning task to perform, by default 'classification_multilabel' + """ + self.transform = transform if transform else {'landsat': None, 'bioclim': None, 'sentinel': None} + self.subset = subset + self.task = task + self.num_classes = num_classes + self.landsat_data_dir = landsat_data_dir + self.bioclim_data_dir = bioclim_data_dir + self.sentinel_data_dir = sentinel_data_dir + self.metadata = metadata + if 'speciesId' in self.metadata.columns: + self.metadata = self.metadata.dropna(subset="speciesId").reset_index(drop=True) + self.metadata['speciesId'] = self.metadata['speciesId'].astype(int) + else: + self.metadata['speciesId'] = [None] * len(self.metadata) + self.label_dict = self.metadata.groupby('surveyId')['speciesId'].apply(list).to_dict() + self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True) + + def __len__(self): + return len(self.metadata) + + def __getitem__(self, idx): + survey_id = self.metadata.surveyId.iloc[idx] + data_samples = [] + + # Landsat data (pre-extracted time series) + if self.landsat_data_dir is not None: + landsat_sample = load_landsat(os.path.join(self.landsat_data_dir, f"GLC25-PA-{self.subset}-landsat-time-series_{survey_id}_cube.pt"), + transform=self.transform['landsat']) + data_samples.append(torch.tensor(np.array(landsat_sample), dtype=torch.float32)) + # Bioclim data (pre-extractions time series) + if self.bioclim_data_dir is not None: + bioclim_sample = load_bioclim(os.path.join(self.bioclim_data_dir, f"GLC25-PA-{self.subset}-bioclimatic_monthly_{survey_id}_cube.pt"), + transform=self.transform['bioclim']) + data_samples.append(torch.tensor(np.array(bioclim_sample), dtype=torch.float32)) + # Sentinel data (patches) + if self.sentinel_data_dir is not None: + sentinel_sample = load_sentinel(construct_patch_path(self.sentinel_data_dir, survey_id), + transform=self.transform['sentinel']) + data_samples.append(torch.tensor(np.array(sentinel_sample), dtype=torch.float32)) + + # Labels + if 'multiclass' in self.task: + label = self.metadata.speciesId.iloc[idx] + else: + species_ids = self.label_dict.get(survey_id, []) # Get list of species IDs for the survey ID + label = torch.zeros(self.num_classes) # Initialize label tensor + label[species_ids] = 1 # Set the corresponding class index to 1 for each species + + return tuple(data_samples) + (label, survey_id,) + + +class TestDataset(TrainDataset): + """Test dataset with test transform functions. + + Inherits TrainDataset. + + Parameters + ---------- + TrainDataset : Dataset + inherits TrainDataset attributes and __len__() method + """ + __test__ = False + + def __init__( + self, + metadata: pd.DataFrame, + num_classes: int = 11255, + bioclim_data_dir: str = None, + landsat_data_dir: str = None, + sentinel_data_dir: str = None, + transform: Callable = None, + subset: str = 'test', # test or val + task: str = 'classification_multilabel' + ): + """Class constructor. + + Parameters + ---------- + See TrainDataset description. + """ + self.transform = transform if transform else {'landsat': None, 'bioclim': None, 'sentinel': None} + super().__init__(metadata, bioclim_data_dir=bioclim_data_dir, landsat_data_dir=landsat_data_dir, sentinel_data_dir=sentinel_data_dir, transform=transform) + self.targets = np.array([0] * len(self.metadata)) + self.observation_ids = metadata['surveyId'] + + def __getitem__(self, idx): + survey_id = self.metadata.surveyId[idx] + data_samples = [] + + # Landsat data (pre-extracted time series) + if self.landsat_data_dir is not None: + landsat_sample = load_landsat(os.path.join(self.landsat_data_dir, f"GLC25-PA-{self.subset}-landsat_time_series_{survey_id}_cube.pt"), + transform=self.transform['landsat']) + data_samples.append(torch.tensor(np.array(landsat_sample), dtype=torch.float32)) + # Bioclim data (pre-extractions time series) + if self.bioclim_data_dir is not None: + bioclim_sample = load_bioclim(os.path.join(self.bioclim_data_dir, f"GLC25-PA-{self.subset}-bioclimatic_monthly_{survey_id}_cube.pt"), + transform=self.transform['bioclim']) + data_samples.append(torch.tensor(np.array(bioclim_sample), dtype=torch.float32)) + # Sentinel data (patches) + if self.sentinel_data_dir is not None: + sentinel_sample = load_sentinel(construct_patch_path(self.sentinel_data_dir, survey_id), + transform=self.transform['sentinel']) + data_samples.append(torch.tensor(np.array(sentinel_sample), dtype=torch.float32)) + + if 'multiclass' in self.task: + label = self.metadata.speciesId[idx] + else: + species_ids = self.label_dict.get(survey_id, []) # Get list of species IDs for the survey ID + label = torch.zeros(self.num_classes) # Initialize label tensor + label[species_ids] = 1 # Set the corresponding class index to 1 for each species + + return tuple(data_samples) + (label, survey_id,) + + +class GLC25Datamodule(BaseDataModule): + """Data module for GeoLifeCLEF 2024 dataset.""" + def __init__( + self, + data_paths: dict, + metadata_paths: dict, + num_classes: int, + train_batch_size: int = 64, + inference_batch_size: int = 16, + num_workers: int = 16, + sampler: Callable = None, + dataset_kwargs: dict = {}, + download_data: bool = False, + task: str = 'classification_multilabel', + **kwargs, + ): + """Class constructor. + + Parameters + ---------- + data_paths : dict + a 2-level dictionary containing data paths. 1st level keys: + "train" and "test", each containing another dictionary with + keys: "landsat_data_dir", "bioclim_data_dir", + "sentinel_data_dir" and values: the corresponding data paths + as strings. + metadata_paths : dict + a dictionary containing the paths to the observations (or + "metadata") as values for keys "train", "test", "val" + num_classes : int + number of classes to train on. + train_batch_size : int, optional + training batch size, by default 64 + inference_batch_size : int, optional + inference batch size, by default 16 + num_workers : int, optional + number of PyTorch workers, by default 16 + sampler : Callable, optional + dataloader sampler to use, by default None (standard + iteration) + dataset_kwargs : dict, optional + additional keyword arguments to pass to the dataset, by default {} + download_data : bool, optional + if true, will offer to download the pre-extracted data from + Seafile, by default False + task : str, optional + Task to perform. Can take values in ['classification_multiclass', + 'classification_multilabel'], by default 'classification_multilabel' + """ + super().__init__(train_batch_size, inference_batch_size, num_workers) + self.data_paths = data_paths + self.metadata_paths = metadata_paths + self.sampler = sampler + self.dataset_kwargs = dataset_kwargs + self.num_classes = num_classes + self.root = "dataset/" + self.__dict__.update(kwargs) + self.root = Path(self.root) + if download_data: + self.download() + self.task = task + + def get_dataset( + self, + split: str, + transform: Callable, + **kwargs + ): + """Dataset getter. + + Parameters + ---------- + split : str + dataset split to get, can take values in ['train', 'val', 'test'] + transform : Callable + transformfunctions to apply to the data + + Returns + ------- + Union[TrainDataset, TestDataset] + dataset class to return + """ + match split: + case 'train': + train_metadata = pd.read_csv(self.metadata_paths['train']) + dataset = TrainDataset(train_metadata, self.num_classes, **self.data_paths['train'], transform=transform, task=self.task, **self.dataset_kwargs, subset='train') + self.dataset_train = dataset + case 'val': + val_metadata = pd.read_csv(self.metadata_paths['val']) + dataset = TrainDataset(val_metadata, **self.data_paths['train'], transform=transform, task=self.task, **self.dataset_kwargs, subset='train') + self.dataset_val = dataset + case 'test': + test_metadata = pd.read_csv(self.metadata_paths['test']) + dataset = TestDataset(test_metadata, **self.data_paths['test'], transform=transform, task=self.task, **self.dataset_kwargs, subset='test') + self.dataset_test = dataset + return dataset + + def val_dataloader(self) -> DataLoader: + dataloader = DataLoader( + self.dataset_val, + batch_size=self.inference_batch_size, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=True, + ) + return dataloader + + def _check_integrity(self): + """Check if the dataset is already downloaded and split into train and val sets." + + Returns + ------- + (bool) + True if the dataset is already downloaded and split, False otherwise. + """ + paths = ['EnvironmentalValues', 'SateliteTimeSeries-Landsat', + 'SatelitePatches', 'EnvironmentalValues', 'BioclimTimeSeries', + 'GLC25_P0_metadata_train.csv', 'GLC25_PA_metadata_train.csv', + 'GLC25_PA_metadata_test.csv', 'GLC25_SAMPLE_SUBMISSION.csv'] + downloaded = all(map(lambda x: (self.root / x).exists(), paths)) + + split = (self.root / "GLC25_PA_metadata_train_train-10.0min.csv").exists() + if downloaded and not split: + print('Data already downloaded but not split. Splitting data spatially into train (90%) & val (10%) sets.') + split_obs_spatially(str(self.root / "GLC25_PA_metadata_train.csv"), val_size=0.10, spacing=0.01) + split = True + return downloaded and split + + def download(self): + """Download the GeolifeClef2025 dataset.""" + if self._check_integrity(): + print("Files already downloaded and verified") + return + + try: + import kaggle # pylint: disable=C0415,W0611 # noqa: F401 + except OSError as error: + raise OSError("Have you properly set up your Kaggle API token ? For more information, please refer to section 'Authentication' of the kaggle documentation : https://www.kaggle.com/docs/api") from error + + answer = input("You are about to download the GeoLifeClef2025 dataset which weighs ~3 GB. Do you want to continue ? [y/n]") + if answer.lower() in ["y", "yes"]: + if 'geolifeclef-2024' in self.root.parts: + self.root = self.root.parent + subprocess.call(f"kaggle competitions download -c geolifeclef-2025 -p {self.root}", shell=True) + print(f"Extracting geolifeclef-2024 to {self.root}") + extract_archive(os.path.join(self.root, "geolifeclef-2025.zip"), os.path.join(self.root, "geolifeclef-2025/"), remove_finished=True) + if self.root.parts[-1] != "geolifeclef-2025": + self.root = self.root / "geolifeclef-2025" + + # Split the dataset spatially + print('Splitting data spatially into train (90%) & val (10%) sets.') + split_obs_spatially(str(self.root / "GLC25_PA_metadata_train.csv"), val_size=0.10, spacing=0.01) + else: + print("Aborting download") + return + + @property + def train_transform(self): + """Return the training transform functions for each data modality. + + The normalization values are computed from the training dataset + (pre-extracted values) for each modality. + + Returns + ------- + (dict) + dictionary of transform functions for each data modality. + """ + all_transforms = [torch.tensor] + landsat_transforms = [transforms.Normalize(mean=[30.071] * 6, + std=[24.860] * 6)] + bioclim_transforms = [transforms.Normalize(mean=[3884.726] * 4, + std=[2939.538] * 4)] + sentinel_transforms = [transforms.Normalize(mean=[78.761, 82.859, 71.288] + [146.082], + std=[26.074, 24.484, 23.275] + [39.518])] + + return {'landsat': transforms.Compose(all_transforms + landsat_transforms), + 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), + 'sentinel': transforms.Compose(all_transforms + sentinel_transforms)} + + @property + def test_transform(self): + """Return the test transform functions for each data modality. + + The normalization values are computed from the test dataset + (pre-extracted values) for each modality. + + Returns + ------- + (dict) + dictionary of transform functions for each data modality. + """ + all_transforms = [torch.tensor] + landsat_transforms = [transforms.Normalize(mean=[30.923] * 6, + std=[25.722] * 6)] + bioclim_transforms = [transforms.Normalize(mean=[4004.812] * 4, + std=[3437.992] * 4)] + sentinel_transforms = [transforms.Normalize(mean=[78.761, 82.859, 71.288] + [143.796], + std=[26.074, 24.484, 23.275] + [43.626])] + return {'landsat': transforms.Compose(all_transforms + landsat_transforms), + 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), + 'sentinel': transforms.Compose(all_transforms + sentinel_transforms)} diff --git a/toolbox/compute_mean_std_iteratively_from_sample.py b/toolbox/compute_mean_std_iteratively_from_sample.py index 565d240b..91b122ad 100644 --- a/toolbox/compute_mean_std_iteratively_from_sample.py +++ b/toolbox/compute_mean_std_iteratively_from_sample.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +import rasterio import torch from PIL import Image from tqdm import tqdm @@ -18,6 +19,24 @@ LINK = '\033[94m' +def load_raster(fp: str): + """Load an raster from a file path. + + Parameters + ---------- + fp : str + file path to the image. + + Returns + ------- + (array) + raster as a numpy array. + """ + with rasterio.open(fp) as dataset: + raster = dataset.read(out_dtype=np.float32) + return raster + + def load_img(fp: str): """Load an image from a file path. @@ -194,22 +213,27 @@ def main(paths_file: str, t1 = time() with open(paths_file, 'r', encoding="utf-8") as f: fps = f.read().splitlines() - fps = fps[:max_items] + # fps = fps[:max_items] + fps = np.array(fps)[np.random.choice(len(fps), size=min(len(fps), max_items), replace=False)] ims = iterative_mean_std if per_channel and data_type == 'img': ims = iterative_mean_std_img_per_channel if data_type == 'img': it_mean, it_std = ims(fps, load_img, compare_numpy) - print(f'Processed {INFO}{len(fps)}{RESET} images. Iterative mean: {INFO}{it_mean}{RESET}, Iterative std: {INFO}{it_std}{RESET} in {LINK}{(time() - t1):.3f}{RESET}s') + print(f'Processed {INFO}{len(fps)}{RESET} images.') + if data_type == 'tiff': + it_mean, it_std = ims(fps, load_raster, compare_numpy) + print(f'Processed {INFO}{len(fps)}{RESET} raster.') elif data_type == 'csv': it_mean, it_std = ims(fps, load_csv, compare_numpy) - print(f'Processed {INFO}{len(fps)}{RESET} csv pre-extracted obs files. Iterative mean: {INFO}{it_mean}{RESET}, Iterative std: {INFO}{it_std}{RESET} in {LINK}{(time() - t1):.3f}{RESET}s') + print(f'Processed {INFO}{len(fps)}{RESET} csv pre-extracted obs files.') elif data_type == 'pt': it_mean, it_std = ims(fps, load_pt, compare_numpy) - print(f'Processed {INFO}{len(fps)}{RESET} pytorch cubes. Iterative mean: {INFO}{it_mean}{RESET}, Iterative std: {INFO}{it_std}{RESET} in {LINK}{(time() - t1):.3f}{RESET}s') + print(f'Processed {INFO}{len(fps)}{RESET} pytorch cubes.') else: raise ValueError(f"Type {data_type} not recognized.") + print(f'Iterative mean: {INFO}{it_mean}{RESET}, Iterative std: {INFO}{it_std}{RESET} in {LINK}{(time() - t1):.3f}{RESET}s') if output: it_mean = [it_mean] if not isinstance(it_mean, list) else it_mean @@ -231,11 +255,11 @@ def main(paths_file: str, type=str) parser.add_argument("--max_items", help="Max number of items to process. Default is 1000.", - default=None, + default=1000, type=int) parser.add_argument("--type", help="Type of files to process.", - choices=['img', 'csv', 'pt'], + choices=['img', 'tiff', 'csv', 'pt'], type=str) parser.add_argument("--per_channel", help="Compute mean/std over each channel seperately.", From 2a996bc00d6f65aa263cdc5ae88c2232cf78659e Mon Sep 17 00:00:00 2001 From: tlarcher Date: Mon, 7 Apr 2025 18:05:07 +0200 Subject: [PATCH 03/20] Added GLC25 stats and data Readme --- .../dataset/geolifeclef-2025/README.md | 55 +++++++++++++++++++ .../stats/Stats_bioclim_train.csv | 2 + .../stats/Stats_bioclim_val.csv | 2 + .../stats/Stats_landsat_train.csv | 2 + .../stats/Stats_landsat_val.csv | 2 + .../stats/Stats_satellite_train.csv | 2 + .../stats/Stats_satellite_val.csv | 2 + 7 files changed, 67 insertions(+) create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md new file mode 100644 index 00000000..87cbb553 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md @@ -0,0 +1,55 @@ +### A. Spatially splitting the dataset +To split the observation dataset in _train_ and _val_ while avoiding spatial auto-corelation, we use Malpolon's toolbox method `split_obs_spatially.py` based on the library `verde`. The method takes as input an observation CSV file with **lon**, **lat** columns, and evenly splits the data subsets wrt a spacing radius (be default: 10/60 degrees). + +The radius value can be whatever real, but it should be coherent with the CRS of the dataset to split. In the case of GLC25, observations coordinates are registered in WGS84 EPSG:4326. So inputting 10/60 as spacing value corresponds to ~0.16 degrees, or 10 arcminutes. Over France, this corresponds to a spacing of around 17km. + +In this repository, we chose to split with a spacing of 0.01 degrees, or 0.6 arcminutes which, over France, corresponds to a spacing of around 1.1km. + +### B. Computing dataset moments +To compute the mean and standard deviation values of each modality of our dataset, we use the method `compute_mean_std_iteratively_from_sample.py` Malpolon's toolbox which approximates the real values of mean & std with an iterative computation based on a list of path files. + +1. Produce text files containg the filepaths to each data element of the dataset for each modality. + +In a Python terminal session: +```python +def construct_patch_path(data_path, survey_id): + path = data_path + for d in (str(survey_id)[-2:], str(survey_id)[-4:-2]): + path = os.path.join(path, d) + path = os.path.join(path, f"{survey_id}.tiff") + return path + +df_train = pd.read_csv('GLC25_PA_metadata_train_train-0.6min.csv') +df_val = pd.read_csv('GLC25_PA_metadata_train_val-0.6min.csv') + +# Example for bioclim rasters +fps_train_bioclim = list(df_train['surveyId'].apply(lambda x: f'BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_{x}_cube.pt').values) +with open('fps_bioclim_train_train-0.6min.txt', 'w') as f: + for string in fps_train_bioclim: + f.write(string + '\n') + +# Example for landsat time series +fps_train_landsat = list(df_train['surveyId'].apply(lambda x: f'SatelliteTimeSeries-Landsat/cubes/PA-train/GLC25-PA-train-landsat-time-series_{x}_cube.pt').values) +with open('fps_landsat_train_train-0.6min.txt', 'w') as f: + for string in fps_train_landsat: + f.write(string + '\n') + +# Example for satellite patches +fps_val_satellite = list(df_val['surveyId'].apply(lambda x: construct_patch_path('SatellitePatches/PA-train/', x)).values) +with open('fps_landsat_train_val-0.6min.txt', 'w') as f: + for string in fps_val_satellite: + f.write(string + '\n') +``` + +2. Run the moments computation script. + +```bash +python ../../../../../../toolbox/compute_mean_std_iteratively_from_sample.py -p fps_bioclim_train_val-0.6min.txt -o Stats_bioclim_val.csv --type tiff --max_items 10000 +``` + +### Glossary +- fps: filepaths +- PA: Presence Absence +- PO: Presence Only +- CRS: Coordinate Reference System +- xxx\_train\_train-[train,val]-\d.\dmin: spatial split, either the train or validation part, of the observation dataset, with a spatial spacing of \d.\d minutes (wrt to WGS84 CRS) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv new file mode 100644 index 00000000..f719c8de --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv @@ -0,0 +1,2 @@ +mean,std +3914.8479827880924,3080.6445717511765 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv new file mode 100644 index 00000000..7a1d738b --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv @@ -0,0 +1,2 @@ +mean,std +3955.529410424809,3234.002077993207 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv new file mode 100644 index 00000000..186e8304 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv @@ -0,0 +1,2 @@ +mean,std +30.654699535584506,25.70223457928363 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv new file mode 100644 index 00000000..8b9b00c5 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv @@ -0,0 +1,2 @@ +mean,std +30.269297362566068,25.212980775818476 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv new file mode 100644 index 00000000..5cb891b5 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv @@ -0,0 +1,2 @@ +mean,std +1184.0608320129434,1176.725015110066 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv new file mode 100644 index 00000000..35bec4e8 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv @@ -0,0 +1,2 @@ +mean,std +1187.8192469177131,1182.4769087380596 From 7b2f3d55689bae1426c4bece34d935f25aaf9eaf Mon Sep 17 00:00:00 2001 From: tlarcher Date: Mon, 7 Apr 2025 18:34:40 +0200 Subject: [PATCH 04/20] Corrected typos --- .../dataset/geolifeclef-2025/README.md | 11 +++++++---- .../glc25_cnn_multimodal_ensemble.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md index 87cbb553..b46184e3 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md @@ -12,6 +12,9 @@ To compute the mean and standard deviation values of each modality of our datase In a Python terminal session: ```python +import os +import pandas as pd + def construct_patch_path(data_path, survey_id): path = data_path for d in (str(survey_id)[-2:], str(survey_id)[-4:-2]): @@ -26,19 +29,19 @@ df_val = pd.read_csv('GLC25_PA_metadata_train_val-0.6min.csv') fps_train_bioclim = list(df_train['surveyId'].apply(lambda x: f'BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_{x}_cube.pt').values) with open('fps_bioclim_train_train-0.6min.txt', 'w') as f: for string in fps_train_bioclim: - f.write(string + '\n') + f.write(string + '\n') # Example for landsat time series fps_train_landsat = list(df_train['surveyId'].apply(lambda x: f'SatelliteTimeSeries-Landsat/cubes/PA-train/GLC25-PA-train-landsat-time-series_{x}_cube.pt').values) with open('fps_landsat_train_train-0.6min.txt', 'w') as f: for string in fps_train_landsat: - f.write(string + '\n') + f.write(string + '\n') # Example for satellite patches fps_val_satellite = list(df_val['surveyId'].apply(lambda x: construct_patch_path('SatellitePatches/PA-train/', x)).values) -with open('fps_landsat_train_val-0.6min.txt', 'w') as f: +with open('fps_satellite_train_val-0.6min.txt', 'w') as f: for string in fps_val_satellite: - f.write(string + '\n') + f.write(string + '\n') ``` 2. Run the moments computation script. diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py index d575277e..efb9b1a4 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py @@ -39,7 +39,7 @@ def set_seed(seed): torch.backends.cudnn.benchmark = False -@hydra.main(version_base="1.3", config_path="config/", config_name="glc24_cnn_multimodal_ensemble") +@hydra.main(version_base="1.3", config_path="config/", config_name="glc25_cnn_multimodal_ensemble") def main(cfg: DictConfig) -> None: """Run main script used for either training or inference. From 1c140e2bab1eee57cd2b8555d7cab8faeb250a56 Mon Sep 17 00:00:00 2001 From: tlarcher Date: Mon, 7 Apr 2025 19:42:50 +0200 Subject: [PATCH 05/20] Updated compute_mean_std_iteratively_from_sample.py so it still computes the mean and std even if some data elements are full of NaN --- toolbox/compute_mean_std_iteratively_from_sample.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/toolbox/compute_mean_std_iteratively_from_sample.py b/toolbox/compute_mean_std_iteratively_from_sample.py index 91b122ad..625b2243 100644 --- a/toolbox/compute_mean_std_iteratively_from_sample.py +++ b/toolbox/compute_mean_std_iteratively_from_sample.py @@ -6,7 +6,7 @@ import argparse from time import time from typing import Callable - +import warnings import numpy as np import pandas as pd import rasterio @@ -116,15 +116,23 @@ def iterative_mean_std(fps: list, mean = 0 mean2 = 0 data = [] + n_skips = 0 for k, fp in tqdm(enumerate(fps), total=len(fps)): x = load_fun(fp) # Giving a large type is important to avoid value overflow with mean squared if compare_numpy: data.append(x) - mean += (np.nanmean(x) - mean) / (k + 1) + nanmean = np.nanmean(x) + if np.isnan(nanmean): + n_skips += 1 + warnings.warn(f'File {fp} contains only NaN values. Skipping...') + continue + mean += (nanmean - mean) / (k + 1) mean2 += (np.nanmean(x**2) - mean2) / (k + 1) var = mean2 - mean**2 if compare_numpy: print(f'Numpy mean: {INFO}{np.mean(data)}{RESET}, Numpy std: {INFO}{np.std(data)}{RESET}') + if n_skips > 0: + print(f'Skipped {INFO}{n_skips}{RESET} files due to: containing only NaN values.') return mean, np.sqrt(var) def iterative_mean_std_img_per_channel(fps: list, From 3a1d2a3f8c6fcb6e2aca9a208fcb2578b7169408 Mon Sep 17 00:00:00 2001 From: tlarcher Date: Mon, 7 Apr 2025 19:43:34 +0200 Subject: [PATCH 06/20] Added GLC25 test stats --- .../dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv | 2 ++ .../dataset/geolifeclef-2025/stats/Stats_landsat_test.csv | 2 ++ .../dataset/geolifeclef-2025/stats/Stats_satellite_test.csv | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv new file mode 100644 index 00000000..920bb48e --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv @@ -0,0 +1,2 @@ +mean,std +3932.149871972656,3490.3687862811103 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv new file mode 100644 index 00000000..03da394c --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv @@ -0,0 +1,2 @@ +mean,std +26.188058348891673,29.624102936518728 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv new file mode 100644 index 00000000..7c5071e1 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv @@ -0,0 +1,2 @@ +mean,std +936.266347143558,1169.9037752672536 From 16921f749de4407af4bdd7149b6eb44fa6cf89fb Mon Sep 17 00:00:00 2001 From: tlarcher Date: Tue, 8 Apr 2025 10:33:19 +0200 Subject: [PATCH 07/20] Added GLC25 satellite test stats. - compute_mean_std_iteratively_from_sample.py: modified tiff loading function to permute axis to match img loading function - fixed typos --- .../stats/Stats_satellite_test.csv | 5 +- .../stats/Stats_satellite_train.csv | 5 +- .../stats/Stats_satellite_val.csv | 5 +- .../datasets/geolifeclef2025_pre_extracted.py | 62 +++++++++++++++---- ...ompute_mean_std_iteratively_from_sample.py | 3 +- 5 files changed, 64 insertions(+), 16 deletions(-) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv index 7c5071e1..5fc9b91e 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv @@ -1,2 +1,5 @@ mean,std -936.266347143558,1169.9037752672536 +517.7869262695312,530.5372924804688 +565.6556396484375,497.5302734375 +376.7779541015625,427.4356994628906 +2289.86279296875,1510.10400390625 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv index 5cb891b5..6871e487 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv @@ -1,2 +1,5 @@ mean,std -1184.0608320129434,1176.725015110066 +629.6244506835938,435.9951171875 +691.8153076171875,371.3965759277344 +460.6056823730469,342.8971252441406 +2959.370361328125,925.369140625 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv index 35bec4e8..7b42aa3f 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv @@ -1,2 +1,5 @@ mean,std -1187.8192469177131,1182.4769087380596 +633.1102905273438,465.04644775390625 +692.7642211914062,398.9754333496094 +462.1891784667969,370.75921630859375 +2950.603515625,927.0215454101562 diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index aea93d81..6a423927 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -470,6 +470,20 @@ def download(self): print("Aborting download") return + def get_val_dataset(self) -> Dataset: + """Call self.get_dataset to return the validation dataset. + + Returns + ------- + Dataset + validation dataset + """ + dataset = self.get_dataset( + split="val", + transform=self.val_transform, + ) + return dataset + @property def train_transform(self): """Return the training transform functions for each data modality. @@ -483,12 +497,36 @@ def train_transform(self): dictionary of transform functions for each data modality. """ all_transforms = [torch.tensor] - landsat_transforms = [transforms.Normalize(mean=[30.071] * 6, - std=[24.860] * 6)] - bioclim_transforms = [transforms.Normalize(mean=[3884.726] * 4, - std=[2939.538] * 4)] - sentinel_transforms = [transforms.Normalize(mean=[78.761, 82.859, 71.288] + [146.082], - std=[26.074, 24.484, 23.275] + [39.518])] + landsat_transforms = [transforms.Normalize(mean=[30.654] * 6, + std=[25.702] * 6)] + bioclim_transforms = [transforms.Normalize(mean=[3914.847] * 4, + std=[3080.644] * 4)] + sentinel_transforms = [transforms.Normalize(mean=[1184.060, 1184.060, 1184.060] + [1184.060], + std=[1176.725, 1176.725, 1176.725] + [1176.725])] + + return {'landsat': transforms.Compose(all_transforms + landsat_transforms), + 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), + 'sentinel': transforms.Compose(all_transforms + sentinel_transforms)} + + @property + def val_transform(self): + """Return the training transform functions for each data modality. + + The normalization values are computed from the training dataset + (pre-extracted values) for each modality. + + Returns + ------- + (dict) + dictionary of transform functions for each data modality. + """ + all_transforms = [torch.tensor] + landsat_transforms = [transforms.Normalize(mean=[30.269] * 6, + std=[25.212] * 6)] + bioclim_transforms = [transforms.Normalize(mean=[3955.529] * 4, + std=[3234.002] * 4)] + sentinel_transforms = [transforms.Normalize(mean=[1187.819, 1187.819, 1187.819] + [1187.819], + std=[1182.476, 1182.476, 1182.476] + [1182.476])] return {'landsat': transforms.Compose(all_transforms + landsat_transforms), 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), @@ -507,12 +545,12 @@ def test_transform(self): dictionary of transform functions for each data modality. """ all_transforms = [torch.tensor] - landsat_transforms = [transforms.Normalize(mean=[30.923] * 6, - std=[25.722] * 6)] - bioclim_transforms = [transforms.Normalize(mean=[4004.812] * 4, - std=[3437.992] * 4)] - sentinel_transforms = [transforms.Normalize(mean=[78.761, 82.859, 71.288] + [143.796], - std=[26.074, 24.484, 23.275] + [43.626])] + landsat_transforms = [transforms.Normalize(mean=[26.188] * 6, + std=[29.624] * 6)] + bioclim_transforms = [transforms.Normalize(mean=[3932.149] * 4, + std=[3490.368] * 4)] + sentinel_transforms = [transforms.Normalize(mean=[936.266, 936.266, 936.266] + [936.266], + std=[1169.903, 1169.903, 1169.903] + [1169.903])] return {'landsat': transforms.Compose(all_transforms + landsat_transforms), 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), 'sentinel': transforms.Compose(all_transforms + sentinel_transforms)} diff --git a/toolbox/compute_mean_std_iteratively_from_sample.py b/toolbox/compute_mean_std_iteratively_from_sample.py index 625b2243..547b95f7 100644 --- a/toolbox/compute_mean_std_iteratively_from_sample.py +++ b/toolbox/compute_mean_std_iteratively_from_sample.py @@ -34,6 +34,7 @@ def load_raster(fp: str): """ with rasterio.open(fp) as dataset: raster = dataset.read(out_dtype=np.float32) + raster = np.transpose(raster, (1, 2, 0)) # Move the first axis to the last axis return raster @@ -224,7 +225,7 @@ def main(paths_file: str, # fps = fps[:max_items] fps = np.array(fps)[np.random.choice(len(fps), size=min(len(fps), max_items), replace=False)] ims = iterative_mean_std - if per_channel and data_type == 'img': + if per_channel and data_type in ['img', 'tiff']: ims = iterative_mean_std_img_per_channel if data_type == 'img': From 6a2bb553d8ad2ed728dcc131ab0af15ddd6d98b8 Mon Sep 17 00:00:00 2001 From: tlarcher Date: Tue, 8 Apr 2025 11:41:00 +0200 Subject: [PATCH 08/20] Computed channel-wise satellite patches moments and updated the data transform values accordingly. - .gitignore: added data folders with typo in name --- .../geolifeclef2025_pre_extracted/.gitignore | 2 ++ .../dataset/geolifeclef-2025/README.md | 2 ++ .../data/datasets/geolifeclef2025_pre_extracted.py | 12 ++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore index 5e3111a1..40b01faf 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/.gitignore @@ -2,7 +2,9 @@ dataset/geolifeclef-2025/BioclimTimeSeries dataset/geolifeclef-2025/EnvironmentalValues dataset/geolifeclef-2025/SatellitePatches +dataset/geolifeclef-2025/SatelitePatches dataset/geolifeclef-2025/SatelliteTimeSeries-Landsat +dataset/geolifeclef-2025/SateliteTimeSeries-Landsat dataset/geolifeclef-2025/*.csv dataset/geolifeclef-2025/stats/fps* diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md index b46184e3..3e5633a8 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/README.md @@ -50,6 +50,8 @@ with open('fps_satellite_train_val-0.6min.txt', 'w') as f: python ../../../../../../toolbox/compute_mean_std_iteratively_from_sample.py -p fps_bioclim_train_val-0.6min.txt -o Stats_bioclim_val.csv --type tiff --max_items 10000 ``` +For Satellite patches (Sentinel-2A), add the argumen `--per_channel` to compute the moments for each of the 4 channels: red, green, blue, nir. The output CSV contains the values for those channels in the same order row-wise. You can verify the order of bands with the command `gdalinfo /.tiff`. + ### Glossary - fps: filepaths - PA: Presence Absence diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index 6a423927..c7580998 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -501,8 +501,8 @@ def train_transform(self): std=[25.702] * 6)] bioclim_transforms = [transforms.Normalize(mean=[3914.847] * 4, std=[3080.644] * 4)] - sentinel_transforms = [transforms.Normalize(mean=[1184.060, 1184.060, 1184.060] + [1184.060], - std=[1176.725, 1176.725, 1176.725] + [1176.725])] + sentinel_transforms = [transforms.Normalize(mean=[629.624, 691.815, 460.605] + [2959.370], + std=[435.995, 371.396, 342.897] + [925.369])] return {'landsat': transforms.Compose(all_transforms + landsat_transforms), 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), @@ -525,8 +525,8 @@ def val_transform(self): std=[25.212] * 6)] bioclim_transforms = [transforms.Normalize(mean=[3955.529] * 4, std=[3234.002] * 4)] - sentinel_transforms = [transforms.Normalize(mean=[1187.819, 1187.819, 1187.819] + [1187.819], - std=[1182.476, 1182.476, 1182.476] + [1182.476])] + sentinel_transforms = [transforms.Normalize(mean=[633.110, 692.764, 462.189] + [2950.603], + std=[465.046, 398.975, 370.759] + [927.021])] return {'landsat': transforms.Compose(all_transforms + landsat_transforms), 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), @@ -549,8 +549,8 @@ def test_transform(self): std=[29.624] * 6)] bioclim_transforms = [transforms.Normalize(mean=[3932.149] * 4, std=[3490.368] * 4)] - sentinel_transforms = [transforms.Normalize(mean=[936.266, 936.266, 936.266] + [936.266], - std=[1169.903, 1169.903, 1169.903] + [1169.903])] + sentinel_transforms = [transforms.Normalize(mean=[517.786, 565.655, 376.777] + [2289.862], + std=[530.537, 497.530, 427.435] + [1510.104])] return {'landsat': transforms.Compose(all_transforms + landsat_transforms), 'bioclim': transforms.Compose(all_transforms + bioclim_transforms), 'sentinel': transforms.Compose(all_transforms + sentinel_transforms)} From 99c30311a6cec6b587f28b15b6e368528ed21bfd Mon Sep 17 00:00:00 2001 From: tlarcher Date: Tue, 8 Apr 2025 15:20:35 +0200 Subject: [PATCH 09/20] Confirmed GLC25 is running for training, transfert, inference. - glc25_cnn_multimodal_ensemble.py: added necessary class atributes to allow prediction files export --- ...aml => glc25_cnn_multimodal_ensemble.yaml} | 24 +++++++++---------- .../glc25_cnn_multimodal_ensemble.py | 2 +- .../datasets/geolifeclef2025_pre_extracted.py | 21 ++++++++-------- 3 files changed, 24 insertions(+), 23 deletions(-) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/{glc24_cnn_multimodal_ensemble.yaml => glc25_cnn_multimodal_ensemble.yaml} (70%) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml similarity index 70% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml index 47db8020..efc7201c 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc24_cnn_multimodal_ensemble.yaml +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml @@ -4,22 +4,22 @@ hydra: run: predict: false - checkpoint_path: # "outputs/glc25_cnn_multimodal_ensemble/???/last.ckpt" + checkpoint_path: # "outputs/glc25_cnn_multimodal_ensemble/2025-04-08_14-52-54/last.ckpt" data: root: "dataset/geolifeclef-2025/" data_paths: train: - landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-train/" - bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-train/" - sentinel_data_dir: "${data.root}SatelitePatches/PA-train/" + landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-train/" + bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-train/" + sentinel_data_dir: "${data.root}SatelitePatches/PA-train/" test: - landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-test/" - bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-test/" - sentinel_data_dir: "${data.root}SatelitePatches/PA-test/" + landsat_data_dir: "${data.root}SateliteTimeSeries-Landsat/cubes/PA-test/" + bioclim_data_dir: "${data.root}BioclimTimeSeries/cubes/PA-test/" + sentinel_data_dir: "${data.root}SatelitePatches/PA-test/" metadata_paths: - train: "${data.root}GLC25_PA_metadata_train_train-10.0min.csv" - val: "${data.root}GLC25_PA_metadata_train_val-10.0min.csv" + train: "${data.root}GLC25_PA_metadata_train_train-0.6min.csv" + val: "${data.root}GLC25_PA_metadata_train_val-0.6min.csv" test: "${data.root}GLC25_PA_metadata_test.csv" num_classes: &num_classes 11255 download_data: True @@ -34,14 +34,14 @@ trainer: # gpus: 1 # Deprecated since pytorchlightning 1.7, removed in 2.0. Replaced by the 2 next attributes accelerator: "gpu" devices: 'auto' - max_epochs: 20 - val_check_interval: 100 + max_epochs: 21 # if resuming training from our pre-trained MME model, needs to be > 19 + # val_check_interval: 100 check_val_every_n_epoch: 1 # log_every_n_steps: 100 model: provider_name: "malpolon" # choose from ["malpolon", "timm", "torchvision"] - model_name: "glc24_multimodal_ensemble" + model_name: "glc24_multimodal_ensemble" # The GLC24 model is used for GLC25 model_kwargs: pretrained: true # Deprecated in torchvision since 0.13 (replaced by "weights") but used by timm modifiers: diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py index efb9b1a4..fc0a0da8 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py @@ -61,7 +61,7 @@ def main(cfg: DictConfig) -> None: logger.addHandler(logging.FileHandler(f"{log_dir}/core.log")) # Datamodule & Model - datamodule = GLC24Datamodule(**cfg.data, **cfg.task) + datamodule = GLC25Datamodule(**cfg.data, **cfg.task) classif_system = ClassificationSystemGLC24(cfg.model, **cfg.optim, checkpoint_path=cfg.run.checkpoint_path, weights_dir=log_dir) # multilabel diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index c7580998..beaded60 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -91,9 +91,9 @@ def load_landsat(path, transform=None): (array) numpy array of loaded transformed data """ - landsat_sample = torch.nan_to_num(torch.load(path)) + landsat_sample = torch.nan_to_num(torch.load(path, weights_only=True)) if isinstance(landsat_sample, torch.Tensor): - landsat_sample = landsat_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) + # landsat_sample = landsat_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) landsat_sample = landsat_sample.numpy() # Convert tensor to numpy array if transform: landsat_sample = transform(landsat_sample) @@ -118,9 +118,9 @@ def load_bioclim(path, transform=None): (array) numpy array of loaded transformed data """ - bioclim_sample = torch.nan_to_num(torch.load(path), weights_only=True) + bioclim_sample = torch.nan_to_num(torch.load(path, weights_only=True)) if isinstance(bioclim_sample, torch.Tensor): - bioclim_sample = bioclim_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) + # bioclim_sample = bioclim_sample.permute(1, 2, 0) # Change tensor shape from (C, H, W) to (H, W, C) bioclim_sample = bioclim_sample.numpy() # Convert tensor to numpy array if transform: bioclim_sample = transform(bioclim_sample) @@ -150,8 +150,7 @@ def load_sentinel(path, transform=None): with rasterio.open(path) as dataset: image = dataset.read(out_dtype=np.float32) # Read all bands image = np.array([quantile_normalize(band) for band in image]) # Apply quantile normalization - - image = np.transpose(image, (1, 2, 0)) # Convert to HWC format + # image = np.transpose(image, (1, 2, 0)) # Convert to HWC format if transform: image = transform(image) return image @@ -214,7 +213,7 @@ def __init__( else: self.metadata['speciesId'] = [None] * len(self.metadata) self.label_dict = self.metadata.groupby('surveyId')['speciesId'].apply(list).to_dict() - self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True) + self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True).sample(1000) def __len__(self): return len(self.metadata) @@ -282,7 +281,9 @@ def __init__( self.transform = transform if transform else {'landsat': None, 'bioclim': None, 'sentinel': None} super().__init__(metadata, bioclim_data_dir=bioclim_data_dir, landsat_data_dir=landsat_data_dir, sentinel_data_dir=sentinel_data_dir, transform=transform) self.targets = np.array([0] * len(self.metadata)) - self.observation_ids = metadata['surveyId'] + self.observation_ids = self.metadata['surveyId'] + self.coordinates = self.metadata[['lon', 'lat']].values + self.subset = 'test' def __getitem__(self, idx): survey_id = self.metadata.surveyId[idx] @@ -417,7 +418,7 @@ def val_dataloader(self) -> DataLoader: batch_size=self.inference_batch_size, num_workers=self.num_workers, pin_memory=self.pin_memory, - shuffle=True, + shuffle=False, ) return dataloader @@ -435,7 +436,7 @@ def _check_integrity(self): 'GLC25_PA_metadata_test.csv', 'GLC25_SAMPLE_SUBMISSION.csv'] downloaded = all(map(lambda x: (self.root / x).exists(), paths)) - split = (self.root / "GLC25_PA_metadata_train_train-10.0min.csv").exists() + split = (self.root / "GLC25_PA_metadata_train_train-0.6min.csv").exists() if downloaded and not split: print('Data already downloaded but not split. Splitting data spatially into train (90%) & val (10%) sets.') split_obs_spatially(str(self.root / "GLC25_PA_metadata_train.csv"), val_size=0.10, spacing=0.01) From 97c255170121b125ce65150235b1be9ce124fd2f Mon Sep 17 00:00:00 2001 From: tlarcher Date: Tue, 8 Apr 2025 16:06:31 +0200 Subject: [PATCH 10/20] GLC25 config: restored default values. --- .../config/glc25_cnn_multimodal_ensemble.yaml | 4 ++-- malpolon/data/datasets/geolifeclef2025_pre_extracted.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml index efc7201c..a4896d6c 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml @@ -35,9 +35,9 @@ trainer: accelerator: "gpu" devices: 'auto' max_epochs: 21 # if resuming training from our pre-trained MME model, needs to be > 19 - # val_check_interval: 100 + val_check_interval: 100 check_val_every_n_epoch: 1 - # log_every_n_steps: 100 + log_every_n_steps: 100 model: provider_name: "malpolon" # choose from ["malpolon", "timm", "torchvision"] diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index beaded60..47658c0c 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -213,7 +213,7 @@ def __init__( else: self.metadata['speciesId'] = [None] * len(self.metadata) self.label_dict = self.metadata.groupby('surveyId')['speciesId'].apply(list).to_dict() - self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True).sample(1000) + self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True) def __len__(self): return len(self.metadata) From 8bf6424ffdcda72766f89a1ed7938d88d412ed5a Mon Sep 17 00:00:00 2001 From: tlarcher Date: Tue, 8 Apr 2025 20:54:39 +0200 Subject: [PATCH 11/20] Added GLC25 unit tests. - Modified GLC25 to correctly provide multilabel and multiclass behaviors. Previously, multilabel would only give 1 one in the one-hot vector (so does GLC24 at the moment git status) --- .../datasets/geolifeclef2025_pre_extracted.py | 6 +- ...A-test-bioclimatic_monthly_5000108_cube.pt | Bin 0 -> 5171 bytes ...-train-bioclimatic_monthly_1027998_cube.pt | 1 + .../PA-test/08/01/5000108.tiff | Bin 0 -> 32104 bytes .../PA-train/98/79/1027998.tiff | 1 + ...A-test-landsat_time_series_5000108_cube.pt | Bin 0 -> 3551 bytes ...-train-landsat-time-series_1027998_cube.pt | 1 + .../data/glc25_pre_extracted/metadata.csv | 101 ++++++++++++ .../test_geolifeclef2025_pre_extracted.py | 153 ++++++++++++++++++ 9 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-test/GLC25-PA-test-bioclimatic_monthly_5000108_cube.pt create mode 120000 malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_1027998_cube.pt create mode 100644 malpolon/tests/data/glc25_pre_extracted/SatelitePatches/PA-test/08/01/5000108.tiff create mode 120000 malpolon/tests/data/glc25_pre_extracted/SatelitePatches/PA-train/98/79/1027998.tiff create mode 100644 malpolon/tests/data/glc25_pre_extracted/SateliteTimeSeries-Landsat/cubes/PA-test/GLC25-PA-test-landsat_time_series_5000108_cube.pt create mode 120000 malpolon/tests/data/glc25_pre_extracted/SateliteTimeSeries-Landsat/cubes/PA-train/GLC25-PA-train-landsat-time-series_1027998_cube.pt create mode 100644 malpolon/tests/data/glc25_pre_extracted/metadata.csv create mode 100644 malpolon/tests/test_geolifeclef2025_pre_extracted.py diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index 47658c0c..79263ee9 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -213,7 +213,9 @@ def __init__( else: self.metadata['speciesId'] = [None] * len(self.metadata) self.label_dict = self.metadata.groupby('surveyId')['speciesId'].apply(list).to_dict() - self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True) + if 'multiclass' in self.task: + self.metadata = self.metadata.drop_duplicates(subset="surveyId") + self.metadata = self.metadata.reset_index(drop=True) def __len__(self): return len(self.metadata) @@ -279,7 +281,7 @@ def __init__( See TrainDataset description. """ self.transform = transform if transform else {'landsat': None, 'bioclim': None, 'sentinel': None} - super().__init__(metadata, bioclim_data_dir=bioclim_data_dir, landsat_data_dir=landsat_data_dir, sentinel_data_dir=sentinel_data_dir, transform=transform) + super().__init__(metadata, num_classes=num_classes, bioclim_data_dir=bioclim_data_dir, landsat_data_dir=landsat_data_dir, sentinel_data_dir=sentinel_data_dir, transform=transform, subset=subset, task=task) self.targets = np.array([0] * len(self.metadata)) self.observation_ids = self.metadata['surveyId'] self.coordinates = self.metadata[['lon', 'lat']].values diff --git a/malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-test/GLC25-PA-test-bioclimatic_monthly_5000108_cube.pt b/malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-test/GLC25-PA-test-bioclimatic_monthly_5000108_cube.pt new file mode 100644 index 0000000000000000000000000000000000000000..eab63413cb0e75301658dcc5140da40469237d9b GIT binary patch literal 5171 zcmbuDe{5A}8OPt!Qo4?@!7cC6qrS@J}F1RB`(@nIel0r zDa+L3GKHBqPl6g|(B0^a-KN{cVXSfLna-pl!OOys#l;-6fB0i2#~7V|pz-s)=axa0 zXllvfeV?D-@AJIRIrrY<<{O)gX=yS4_II~=kJ+*716{rC?)LUlVW!l+Z+vQWV*J2J zX?%3}z|>@EY~pao=**$vUdM4do&Mp`gZm1b_K%cCI{Z&f@N{9{!SRXx!==LH%+&Pop{~}VX=@u=1-}_QM;<5?8`6;0+{DyK>BFhA zwYXxa0UxK>IMm2G>K`mN?G77D#g!k*n|xD#U4HeEVspMB@8p|~6jwbuIyyBmQ5Y?a zPfgCW-aEa&FkRR`I6hixEw*gfQ@o*6Y~545vBS-;YFWMe*q$dysmr}?ySACW<+N+x z;Zk8r=(&OJ|D|6`J3295Doqqb^Yia}eSI?&bzPt47PD?SG}xg%>FhyBK^sjN+HrZa0IPd3RwmFU28G?fxBiqE4`g11g#)iL(|07#+#$xrc8S=d`oU7%v#Rs0} zH|C53znCik-R)3c}&Z{TkoWhTg<0W*@{E9}A2j8wqspF__+Lmu%RdO77*L9XIw z-;5rvUVqwm^Ktq?E{pogi1upRit z+lfK;ku79rFLg;Kr|hpa=W1tDCVxU6_oL|9dvLaiX?YIr;t!vS`2P|gWVhb`hmSFT z@DHfRf_KYz;Kf=-=i~np^C_;%pCRA;VN#|VC{YlkV9l|H+{h_S^WFY zrT!lAW#r?Jflb~RtPFnV-<>lqcI?(*{D)~?2R!kf`)AtQ1S|)+|3cc|48AsU=5JhZuVDA+_v{no7BAu@ zo701}J}JNYve3c(g-t!wnVN@r-eJ@8QvE!&mw7FS>}BL=Cvn8;jAA$} z*6pAxldtk(nm5FzzP9|HnpiDtn(yy%KP2$5up9K6L)l|-rr+*a`hs2c>AaFN$k@a! zB>w0r;=poya=V@{tQB+-{+An>A6A`B z2yP3{??ryIh3M7~{jz4iRG;4$}n z%2z$M;kR&-8dxK^8TfmjA>U#>8>s&a=!jGQb$`Tb;lREOwql|k=~)Ee`S>jy!ykSz z{497J<@s!L7Mkyfm=_w-`eJmNr=ee@{53!7|E1IPh*iE7@>ySH57)(c(LEW$Q?n4Z zVS}H}L$&GiEIGlYIC5CT1z+NRlnj>opSg3e_IVYEBl_yo)B{#VJ*8h2Wc4nDk4FwC zz>ZDx;*!I=Kdn!)r5@PShuuO}--jM07QK3)^GFUOYsFUM^)j5m)xA-LPe{MS-9>hg zgBD`py*NMSkApQ|o1C%iJ$zo^t9dFTkAWv1;%}6rkNX$?5wiNYdTTDV!7mp7Kc_a$ zk562c?+XE|W=uVcnP=5)(E0GHa9(9rgDwQ`#QV8F;HTbcZmgeRLr%DKHr21g5kJWR z=V3pU))gy;c?zBytUYHOxMc9#yMoVi_PS5hhv*^n>;wAmgBc{m!`1Notis8820v_z zp9k(FHvBbX>hnhWfcW4agYP-;uGT`+bQ9^8tu8ci; zk@*yV{5|BcH_3~id|WslwbJ|7U=!Z_Z@^~FfTei}pCb>wb&z#`dfV6|hw?#g17G#{ zT^{Vh>_f51lW+RWe=oFx`)aSn)aPGYAX&Aa$s^3vx~h+IWBoRu z?nqjP&M+Zg^jBs`*!afC7kR!_kz-`pXV)^Htg1)-_in>pug8aP|DVufS8mN)^%d~g ztGDu>`x@N9*Sy>4w{7M;*}HYVZJ(oG)S~B)=AvJpN5~$s=4tjMdGW8{FMHg~N^S@K zA^9%;gIwTi9yCAk0KRoT7Cevs4t&-Ko%lK5O?w~XAK`DG0-Ji(s}Mi;M(~j>uyk(r zeb~VgCma13-^W#p_}KVl5>{*(_=d>-ui;m(PdQuozLub`qKk=-9!&cYle3H*&VdCN zoxk|bQcsmty^rA=zn04CzYX7H2wzr%-QoVN+@5}4^{{*Lf1RAl3#Nt7+%v3TRp3}r z&2#DZ9~Zuwm+&Soodx%=W@4`Xd7%E6$q}7Td#(Cw{J(`voQtn`XC6!Q+USILndgy% z-VulPtbVyZyZzM-nKj1fy8bWMt>(7n{Bk{1n4aM`es18_r9WM3Kc%}{#o*jGANa@$ zemCp7K7)JAs^u`~sQq}JDNK)#OpHIs+moru;qm>sfkqK>oSweUuI|3R&Mlj{`g;4j zHuE~g>Fn(4ak~33_H_66^!AG79pCu$R2vL+x%mc>z4GSf>X#IE%W%!#|M4DYDbUrg z8Ma*mvWfRE%K*Ca24pGH)nl>c8l(%$LrPy_EQPuH?6+P66I#nEs6KH^ajrhSd#=H` zW$m(X{_|Y9`R3-0_4My(Y`+4uvc6kf*Vo3!n0dhUN1WcVnRDy(zcG55=H^#6Yrm3z I@=tyL1%eUzCjbBd literal 0 HcmV?d00001 diff --git a/malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_1027998_cube.pt b/malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_1027998_cube.pt new file mode 120000 index 00000000..ebb0aabf --- /dev/null +++ b/malpolon/tests/data/glc25_pre_extracted/BioclimTimeSeries/cubes/PA-train/GLC25-PA-train-bioclimatic_monthly_1027998_cube.pt @@ -0,0 +1 @@ +../PA-test/GLC25-PA-test-bioclimatic_monthly_5000108_cube.pt \ No newline at end of file diff --git a/malpolon/tests/data/glc25_pre_extracted/SatelitePatches/PA-test/08/01/5000108.tiff b/malpolon/tests/data/glc25_pre_extracted/SatelitePatches/PA-test/08/01/5000108.tiff new file mode 100644 index 0000000000000000000000000000000000000000..bcca73df66f592f5d226b786bb8fc1250986d286 GIT binary patch literal 32104 zcmb5Vdsq|K_CLJ#&SWxy0Fw|PT*OJZh=>>v5$_XjA~&O=;Mijv>dxZL_oxd zRH<4U@P?OKv}&pK*a1<|Qj6AF>am^?(NeWGURr67^}N%b^L?M+`#j%2-e2Y!)|$

-8=>O2MtW$E^Yxz|Cw_Pdg^ndj~@6jUauRfmg zg~xx}|GdZZzd9@HUjOYw%YFR*k9)`bANpijSIB*XSG!`!EcZ$B2^8Vp;!l{J|0OANj-n z_YnL6&dcX_{Qx$}@~+&;VgjHn0N>C3PhSB1-(&RncuQ*9f)!cWSu3)}I&s|S2`d)7 z$t}rxbHNx>e8QBt$rG#@6DFpcxaC=I7Uau~W)rvUtv{9Q1QRd~ZT=0J}$k6|FkR^G`{%4HQL;k<7=zrc| zApaljf8_tP|L?M;DOrC102>-6{3ZX?(O)*|F8?(D(Pg9hs|{1fH~wkIe;PA_f4_s6 z;~TB=1^(|f{g-dgg;_{mdZ%x^5n z&(AAZ{#IUgk#$+#o3b5_;l}@I6sz;*=N1(K87MAxu-O!++%7I|a__^D$`ANF8#|hM zsT?FOHnWiuRV|(>@fn376*$n+UPbk zhJ>1D*3oCtSbl(YMiKoD8o>t(GqUL~(KtTDKJzyH4>TG~WClZw$WL!o(q@_!ad#iUbo>CQ)AKd*O34ethj7glxyR*1URW7D+x?21RvuY;^ z0%{4XhxhoPH%9wqie#JM!H&AV8JYNS^IAQQfjACxC2_fUG>Ls4t#+22_xY1x-ZT_~ z1Zp4zHk(F~ATt#J0S*%nK}i$`T%BnoF||`>2(+8VKtK}}d4a2q8BSt3T#z6zR}dqp z{TSP%Mk*|ptB3)c$XC5sUKP|`B01>ynn%ycfuIj{6I~9I~ zYz={JgtfzZ^4?fzuE~l~>m+q7QcH|k?a4ViLP>z#G>3%!!kKYUQ5Dh%Oq1HcISn*# z(F^zui0#5j*py2xCJBqm7(K6bAg`x=Tr?TaQj+dL`ot}I;>}lkiN8Lnw%m)WGlv813KvzJvp6Gp?@Z7}}Uzd`tE{kV~>ff2_8$fx8Oe&6@kanhkP&Yz@EwMJW z+J>*P;!GU7Ar}UuYrwV-Ood1pzmA%ir%_+bS8oz=AkQv>fTL<>)vL{@N#{goqu8u+ zO7ooe*pSZzJ=d&>op(O-NWbjz))J^MC`5PTk>=hr7;kq|i<&gTIYF7(NyBTbV0;LP z1x{{)OAfJ7!@)Xz_GA4TurKrmmH|FVxp^bgVqu+``@1WNF%WlucRS#M*+?QvfH2sd z3k!pYC9rYr5tQ|AB^YnTiJ5gUpFxAgSNKAsqTkZYFUTPu;0?ZD$yJ{4j71{51FheG zbrXjvsK}gto^%y^-FmaSsQ_!fJfOK`rSv20^UK&Q!daoGIR+1nV$-O3lrubqIDqN` zE_mmo#Sn0oi5CJw2(u6m-MJ!tZ{e+tzx^AVq<60tcT&N5>xS~K*Rrh|DMh zyZfQZqX+Kx`F#VgvoR#;w~Dvjfv-KC(oO5eD(aq#g}Un4mJ5{*julyHI#8-&D1-x@ z$A)oyb!ep&Knf0@0`;hJ&R+-Gk>f52&EZ4?#iH}JkICOI6ZZ6e?i`ww?&`whQM`5G z=Iof^@Nw$V0CNFaAeBwj7T|js8wXo%D@&eNTY2UT5}MHAlF~6GGYVD})T>#~5aL~3 zdAD7;nWwY%nr9)Q4HY|Rl}oO3uKTOCpq;C5Yq@RUDY{r^2hng1jK`<#vQpv1#ezjf z1rd87u3W32syYZenVK>4S?MLGsXmJMpVXZmi=pd#+l%uPkkJTan%S$W`(;5N+G3DU9#OHtN8JPPX&VJMoATq~b25uv_RdfpT-}W2e;D zyEh8P=@ekfE-yzY^yY**IAm(DFs!d7azW|xwAm) zVa`Y6t|e2mgp&|!hT>0F-0#g4(B1TL5PD+_S)P3Ta$o_G#=^F%AGwNShfcEB$u7@L z0WKqu`&iskpcqjTxB~1WLAvY8h+*GaT9as!83@bC7TW6RH9_m@CxrD+df;4yh~mAE zFZTH;`gs#hqv(hoioxLg)?yRl%@dwC_)Zct&u%GgI+pm{rBZBm_K=vPQCWV9@nLfz zE$w5bKsg|dcgFmg5y$PAh1$dL@+6en0uDvYM>rd99n8Orw(9vA00YL#z$% zA8>!mqDNo-J~rw(qZocE)AV-DarJtRou*qVDB?n4W5P+o#fE`)5*RvtpNwG#35jyqTe5I zzzIyZiaB z?jriIB%I$F%NY`jUqB%5`*8G~yd)pHKAdWJeD@fNbrenL_9FV6fVxU)RSrj&U+%sy z7MSwFfO~-i5wb@=mZDJgvVaN!^M>Iq(*_8+sN!|()vIdP>K_Mo|CptFaJkS$^3;Xb zm_$1g8X)Iv5HX>gf>B#c9)gdr=`~oc0#{zhQBUSu6;hzG;kq62SoN#O02-&PtIf;z zAf``#9GMZAjqmLoDL%s?Jy@U(54U5`Z&mCbS~7a%`l$T&swiCQQoeS#@g^ASu(EC2 z@${I4ww!C}KE&j4H1n;Nq}`8ip0|Wz;|&;6<3lxKP4HmlKwoVAa-vB`xCiypsJ)(8 zoohZH=p}LL`Qa5xQ~QPd^AEj^7mr3wdgaPfz9Q8s#9)^`Z$~lxD1G||pS!{hNL%2Y zR6E2{3Dy`L-(Nvq0iSbT;EqbWm(Ynwx(TKpr5!ul{YnKt;Pp$1PvfO9>ijaak8_73$G297Qe6L0{eBL|5P%(50VoXq8IW1_8Nh zYc6VWojf=^gZCLmT}hT0^pVP#l!)wx75A*)+&zG@&3mP2c`ZPjLY8mf{E0DgasSB6 zdqyUma~Jh)9Gx6sWd*%vc`QsiEr$iPNbgM?;l4M`_g)XmnK#?gNjOghM;BQfd0*$k zQo7mlJDMkrnOCY74egVs)kHY?obUQkI)|#-;zVW>czPVUx*ZykuwoXWn;3aRPt8#5o(M1pZUjx77&Ui^}+JEL1p{z3vgHRd|8 z2?yO-+!5u(nee>5CM02#X3=@#bNC`mx;%(C1tnHy2De^Vztmz9N6Ll-rs-c_%r9=n z;brFYHdZiwk#}+z0UQwDQ9fr({U7t+`VEM@uh zPo}(&jB!P+Lhl&2$=f@Qv5}dO6venyP4C$UoC9lV{hJSf3&GF~adqi;Y4q4<==EpL zq@QTdf1*ICIMHCewc`ea8vY4C#0}=JGv5| z+SJgW_d7pHSlzN4iPgXb8tu^kYIS}?1EBAx9^Z*M+@JRjzgz_iG-zSQvZ=X#t&tF! zypO+%?#fem(b~tA+>IM4ANi^-$0SP`=m9+r*SZqDPNY99NpcqT1MatNHLgIGC|Gd= z9dw^f=rQv$G4WIm`!b~5hJ=y_-*mc*p3Xd-R#JO*%Ny*x8y=g)a|`l^ah~F3*P5r{ z?1R+#grNKz{SO#4f@l2I*1`N0^kHnSCOwZT9P;%NKhd=G=WX5@Gn@Cne%oTk)2uLU zsu%svg6k)7!awIFt$L!@zzP=tFeZi>=LA&vn$&G;jkf4`5R5 zm&(_R22D)~O&`7RcT7yKTc>*mufp6)&w!|v^REYItiwkINM5#~owXV7aMJxlmHWC?^NU(SIpWTAos)^6bRdV!wiD0AWUI*GoSQmT^cdG`4r% zf}LAUsk%d&?1aZ!{K4EYxVDkf-0=y^Tu z!bV?eE%lI0%WLo#0*B+Q)6WOG@j=7u&s6MN?73c(U(Di3J8Kt2J%NJq2UXwy5;=0y zZoOnWN#~DjJ3JVI)<12=LsN$;$och+c^hJBViK#&&xSuy^K`!lC5cHdW53MDGX_=N zobH=9DLJAeu;S`)Aec*u<7s<)!|3_>zVsEW>=~@MrP}fpf*`cK+<~dr)~V#ARORSey_wRZNdP@avmRfo{b3E;$471AULZ z=wwD;axxsW*f5ks+~5K8>sJfP)yL0P_s;%zY{sUO?`%)&nYmtG8E-?JBeD3wSdq#s zJxIdXgF%PgiC=MRsfx!M;t8c^!Po;R0}Ys6BV~YjS2FhylCNHoYAe+9IwbC=%BbXr zNi%ZK90QERy6NcRtIEHrp~2G?=$H%J-MU7t?{-^XJofW@ zW5LphQXJLaboLYU?t)5Re$G?UyI0shXrG2A3>$HS`_#gR^hGO)*X*O0z3U0$;}$p9 z_=p|>if{E&J6azWbw1w5?)Ncieaq_{_K18}T}VOD$cE$IiM{@uB0w4CE|@~%j7i14 z*j!`Q49GL(E!jVCu`mrL+&H1o&`^SQ7+q=*u3Crtv4|#R;~!^ z*|jbD9On1DKzteMa1cqJ&MJ z5W;aWJO`}gHO6M);#jei1Y}*boX;!`8f$Z zl^n7%`9Q>EeVigYVsi47lRCezxgiVm=qIOwc(aE0JE_<=L)wGxT}kQj=?^|Pq?;es zk3}a|hrT)2gZJsG-JL4^iuPGYpguq%TAKbYzbr-c&gfm;K!VZy7yl$laQS?KZ`B9fBH(s|T@5l_J58@B}dg2Q357C5yn^Om^|udu99dShr7h-y%@z9N@3~TO9lD;J z)ACU!Z=$zMUDEb+>zj`09D`HGSMAUCl|#3@*AW@kz=}lQ${o1_bMs4!edlwLY2u*7 z*!79NDq7bkzGx$gD); z=JY3SM=R|w3-%@qx-|8BO$Wm(Gw$xe0cSNm_(RdOqZIZJ4Qtt(8C$#pUDidHdR&M- znYryj& zKRbB0&V?vdgS;N?&n_!yP$+ZH=Z>5RM*qmLMBA{-z2z^Drk7PHkV28k7V>U$ZI90V1!Wsq{H zF~ZDh=xeaGgtxl)P&Z^RI??8QlKNs-=`O*mN!^M@Q@bTkQJsy(QZvCbM!f>XP#KaZ ztDcF{sVPG6`(}KbsUfMII!R80H|N-c`jnYke^Ms|G%6*nH>nWORhVpc+9~Y;(bD0#1SM-{1sGq8*Geq?NkfY0{9&L zT;Rk~Ghl;I(X|(1`*1)Ba8%?lqCUnbIjt4B3yfK{D~+QDfvM3i_*23}!e+88f8cpu z-KZM!pn$sK&M#eym2GYLk8} z$zo|gVzKK-BuCa!Q8Bz>ENZ0%otb2D^yiz!q#KBE1_61C$zj{5rz@V-kSTXlsly9H zC{KttxO>)d?k@%w^O^KQVgV`@1^I*`g`mwho5bW@bgZRg@9Y_!;0pi>76!e3LXEjJ_gQ~cooJxb>PJfyt6jJddkoET3+Lx9wLFmi$UkyFl? z)ZU!>o2);<<2*88*j3mFmDZft!Nl~-PF~BvZhq@r_%N;zuPI59!&dF%Op_N${!a5) zUg4-Dh_hjgjdm)9#FbSswM%`^{!tRPH>_i5%KT&x;NqfTi{QShAL<^XK~BBxe*GZe z>mso74xA_v)Tg95^=SA&f^KEC#OM&_)z?)9f$9BP1~vWfRN7e)cJNCtR*|?-4^)Tt z7AosLkk**McGiK}d%Pw;=KfpdxW=U@iE}zAuknpbltx}ULROaWFw`OlUQ)j7ohH={F7+`wu4oKFd+ zMO!17b|a-#CNDOXyB&*%s>=ty6e|&Aj}M`nKb8PcCF&FVeKm1)zGp+Q9{bW}P}Pka4x)AJPglG4*4xl`p4Ua~Hw*u(!Z*bU$H>!GwM! z19$gsY<4l#+@9Qz_{Gu9_>fef(zYxOS2nu%{p>*HHfk(Ml5N0&>Ykp%7DA~wm#fv^ zzLt~SSzysCl_I0$4TFK7tR@pO-|a8E8e&E#ZH2g`fa=ey-zv51y^aD-COt~}?O-*q z24o4rt`3Ucb9fOH+t9`h8ob^>X?WK~TWmTdaG8*GB@MQ|hSny9+rt&qhEAFmeg5SQ zfkDJPcrQqu3%)^vdRHV(UX_)&nbq z3@0fy+gn(fnb0QPZ|XNk44j=znv}G_SihD+j$32UR(`~S$6xM3mPJncMQSW0ErE(F z)k|!*5Ux{<+j8e@!6zP>B_fr!ub?2dKqj>O)joV+;!Ke?_Sn@<|JZroMbz~J&MSR? zIkjG7s2Uc>j7l3x5z2|To=k1JG#R$ehTn2#c?f}7w}~q7WzyZx1lQCuyFSABefxJ@ z=%LLHYK*I0W94|dQ@r*iPs+%@QU*rr9~8o8&Rq7}^43i3 zlC+APFqX{BJ%-MA!9RDfF?^LRLL=oP)8W!m@2lCIa_(7sx$M1=tH6Yg*?!i;rM_`j zINv$!TUO7r!v=|b%12dI(eO5zDHQ+_e4G!acmP^8IOaXuP=2j_j%?NU zaWwIJk*M$(6WUP+C!4bi7&mwuLTNi&zSe4r#)8RrFQa}h9 zC1%^aB`QZ5&1Zp6426|Zkk#bfPO(ZiQeu-)-6SO^78iH}HB#xvZz1}+5T_Bv{(`ZW zNpgxP1}-9#$#fc!PUf?0k3Qo)ST78S32dfTL8&yr9uz1?86A=-WFSlN#y)8?~663L^=VnmP80N z*I|ELCMMb2DKDiH6x;oaikzYs^7A^`&!Z_s+QcA3i0p=-P2kE0n-w<|B;Cc~@;j!< zdhyd>zgQfg@a2LU)i%T^CUe28`W8x3WRPI1dLIf=6pa10mnzV#hI9yIlwP>2ZDIYLI!l2Ws_c{Gs?WGV{cW0$f12UO7hGYsE_mm%W{SUsAKk)!fTvL6DuY zLV)bnA>hR}T}j}-I%T$49n8GDt?tvHlAuO>s^VIbX#xpsq6T%?i{pksY%d;BLTlzB zwV=*qwqKK|U>IOGEr-BDD)Nd@0YDC&)X=-_>OMJ=Qg4BZ2Kc>qyphC&ciRZlE&BaPe{k;RLklm`#82_Ue$)O0C5C z+$6C!{V>=lQM!X)413sdhqJb0e>=MfA$ItmQge1{BD>k1yy0eN{Z(}SN)*Sjl7+Hq2kM3#EMvjeFZk%4A;x_X$ z8qmS4Bx)pDZ5W~4Z(nk85lLvqV=*n^2@oHG!>~tD!emb%t;k!_c0!q$YegP^hKdB1 zop{RyUp6FkGS%cGC=@+>urX@h$e%kN7bVG%u1T4bwy>47B^Ggh3k% z7xpomSGr*;l8?ji9T{!iR<;1{j1FYky!O%X6j<+Hf%!Wa7 z_#-lURI(eXcbFl;?4*;}Ac!bjO|09Ys%t_Ur=Qw-$i;HJ9McY3{$)=f%PR~ySGAj) zVKvjCq^H4~NyJIRvCBVN7LH5WseXaGw-fsTq#_t?NK&eR@g{~GTUo(CVs3|0yOR^A z{dTL+i%U8whG+(V(xmqkxop`NAw|xOuVU-_?~q)D8oXz0Cc&Bqpcy-5>`MK4nqUI$~$F3$cZi)Ao?|BZqKzv7uY0n=*kkbd>qQoJL1i*z?THYmahdB zG*!W^Nhz+xoJj&)qeRAx@kuD>l=?W1H2{^?U&Q^O`~{dK6j~rDzN}xoA;6}h1ZwT~ z1`)N^A~%kT78xgWJsUyff&SvMtKN2Xc(u)^5?^uINcgGO*3?40on<)XTV#xye4cR zh*3hnJrZ(X$-*U41hJq^)5myUu$da}cSm{hme^DeHZ)|;+pcyhxRodb4=!w?of_F# zfJaWLOtP9Bs`6X^3O?NFn!jX@!5Iugfjt9pBc0%r$bm#>btWMzz*q?zUl#Tmr~ajNlvZ$cE#L5>pJ>>#E2t1FMcTw` zKhhC43d}zmhul8ncH~7Uv7A{^DLJ`ttc47FpTlO#CBG0+cD;MR(a(SL zt#L%RCwXHjEQUDm!)Zfn{hH2pXJwsLdQa;82Vtuh)iCYld9A7geQBS`l-%_!LsoFF zA{Hy{gddzn*pDusU=9(RSIG}CqadzOSdK&mkW&V~q?;QFyM#)dO=~5}h&*P7^X%%;V7a|%(MU)%yR8pNnoYEoiVeP} zVPQyCLK78r`}$mT+A-pXZ<9TEXXmE74i(jj_ic1Qe?Ck>w3815gOD&9+K!(CWN=

=;#E1d{m^wB6zS)Z@nX$zb1uMkG0d zrC}DabU^jz>CV8etUdN9JmS+aGEI*WPm}m0C%53t_KUi&>ROYq*B{d?zLxFZ?)WF` z-yDM7a`doq=-+myNG537w+#G}`W>|XS;r73vTms*#V0d=82+52!G<3dTjZ$Df^C8aau_wS3_{(X_5fP$%h12TR|yoW@L(DBMgU zCilvRt6L7-T9Q1NLU4V zSD&V*${^cQ$rUU!5v+x*(_m@ZD6-qG-qzVE5cSWiZ#Nu!{f1N|b7Z0QK0pfL&E()G zb~T~hmOR{jaPE!zRy1UU?k)yP_YWdNcf`V>hL;aAeL*+mc=&d^ADS4vJ-2L*aKU5A zS(?5vyC8qp-p~*3(p$rS{kh@d&-Z_H-;``1`kw+$N0@Hqpw?}ZbRodl(mCggjdR;= zUg}nC|8x0LKT!G`205yFv8wI)LZhJ213zHqKwRXTh(bLJ`WE3rpK8 z>t}cHba0jfxE1K$o$N2KpS0A$Ajj2BRzvoK85Rc~a@gHYu@4{OphXQI(h9-kd*2(U zy*_F01!FV%!NJG{ErU;^&isUUClBKDjzafNmL3Tb7!L#YhJw?N`mwvP*dN)8AdP)T z;?adSD7*Y(bFt1mt*1;ca?^vGz7jdN`@;7EUUs7GA0d9On`XHJRV4T)D$h->_e4J= z)~0nC?EYRqNlAjyS;80iec$rjJG~<(p$X=K)n<{K5`d?10uJ3otIiw>F=>vZ4cXV3 z7++h@KRY#ELKk1G;#x<8A?w-bnXvoG`DGxCB$CdB(0*xLgwG70cWdDNi_@OGj*f_#AtE1+QFlZM$8Wvt9XUU|S>`%jIf|Xo z@NDlfpDAYs207@6f{?glD$QcwW$F?4U*r#L`V7Y_8XWPtUk|sdw2Hb==>-UVz=xo$ z7$ALZqGzb|Z+>1QnxB|k)!_x6o-Wa*Mg5J=yC1k@7Tuj$!DB5krBE4@vJT+L;JArr zyus8j`@t!Z$DCY)DU$`*0GxlQFMqJoH!0C-j6z>_eJRBQHyM?)eoP2((J$Nf$hnA# zcnwKO{%6haG1>8dARB6Z@VduI$@z@w9eJwdHab`*W2Co5t6V(>L&N@A+>4~g5K*0! zbrwBqTirNdV(<31vsWn|cOQ3p;`B^_h>mpo8T5k-zqyMg{@bz)F!`_-^fxVjok2dI z{DsqcncMy8`mryfJ&FGxPLDMbK2_CQ`zYE+u0`STkimU=(U1M=;%KEmI(cjf!Nc6v zt&q5SN$+B(=Q!^8*|pJ0U#0Rj2 zXWb_rago^rGiTnF2DZL>{57c=`eKdo+OQw9Azzhs|Hp9&yZdvx)~J~9`N6}Vn!;Cw z5BxD_P*xinBI7D_TC4e)(D}^6=O>50S zAuVr0WQ;HwO?o@JGsVM{F`VKM=sy?kyYCH@LoBFX?S8Fyb4K##+PjJRY=7d_@6hXG zExVU!!(h>4Conv#zhxd@hQ69?=m{QAOdD4&nr`&L#xv=nau$lg0T098ypF&$FC=pO zT?t8gNV=8Em7*VhjS^*QV=nq|S*5WJP#?1N0^9-PX>cxcTWkkDEYjAiIpk|(bW=wq)r{?CkH?3;B z|I*8ggX%+&gRPxD&6D@946>=IndniW@!P91hrVWD`k|I)L>!QRah#DtSuTEddaBVU z($))nWGQjaIdovP@ianhkk2Q`&%*(1jj35d(XooMh#4TMGg40LIm z^Tjz!K8;q@xCUgsp{j|B9@!SnzwtDD-15euWZmbjjfi_^o)N(Ea=z=#lQP2+?8DgU zX%A+<@nq)O<1!Jz_@qX|l;(HmpRVwn`R>wV-L9`ghJ+4iF#lxA2`MqZ@o>=>sh4X& zRg-d}SEEW8bYT8Lga0(cj@XNihO4eOvXN*QNWUHm5}IeD1bW$B0WBJo8&FFVpJ~03 z(>WTwbX`A~R!GL=#MF53#~crn#{{a_nB1q6@7htDu!qY+oI$yR|2A+L%3X5vG z%-Fz}NfTx|TIFI*oWSVALu2iMGf~N;{ezu2#otPICtMtol$E%N>+d#i@X*8A!Hpv; zgb*Z#fF%PghOyuid~#!$rD+V0N8XR|7d`(3u*Tm}+r3g!<*E7#ZP90+|DAz;?Bdrh zcI+tV9WGn!|6BZt(Z;6~hH`Fgf#`lj9n*G5E z$(6Xv&zZXJxPv-z&t+|7@l?-yLV7d*ZmU28bt53%g3Vx8toWDGs%9U0VkEil#wo6fAurZ zGXD1SjdwJ3F=S$^)_y<9A51z) z@&1&={)F2Q81J%Slv?rG49mIHT(OM3>dE2Q$-Dymr+!jM>hn?S_JpJcC(-9fRC+uq zM4ujT11xXi@f3CcIvjPp-YQq}3@+H?2|g)Kk7uHN=wRaKlx!0I>EqWXw^XB^E+#R(^!T>ehaUG%>v`yM*bcEbryD;^y*hjO{MX(bJ;=1yoHZF% zcJszm#5^8yZ_ZZM`-QVp7IxmS{9{kHFIfI{n^RQ*5zHWQ{<;hx3%}-tAb>N%D z$60UDqoQaaf^S>9~pGQRboawFhvE&_qDFIAp>lxt5Ei^oI1MA_}$`z z@eMcqc)gx5+Ckk$$um3lYyDlMef!9lcf2IN6c&~E@*YQxjlTu(s6a97<3xASD{aJc z$a;ttonBXe2Rw4nmFN{aC;$6?_D4?h(2kt?Vwod;g@k`)h=E|_vCCWf-drDdE+MkL zpQ8*$jsCv4c>UP&DI{V~a%i!5$mvRGshzBo@HMKo-*d0GMdjrd4pXnLF@7Jj`&XC# zx@=$1n4;N#I9lFM-tA%U|I^Q&Ft%q(Xy(UR-%NJ8x=O^ZoKK0V{2Nq+TAzLQtWC}L zb6Zh%TNv8G;~C$C(}Lg47izUMCu1*u+n$Yj;}hd^`$>M&gSHk)e>*V$m-n*{pq`kz zkYugs-`{a?X#J5@pH!@qGEuz~EUxUSYG?oTU`hUIT+n8VR@vROWr&?W- z>Ykn+V5b_gG7#GQTX+5n(0lXr^R{e{f#*kjxJNIiQ+CEpq(7_GY`eL5b5x+K$b9e! zly3*FatJ#vmiC0vGMX@{jv0^kiW8eA6frTVj-ObN$`&cYU=OKjWKSyspp4(!#6D3( zlTxy;fNfTcfURUd$2Ka4!yay5Pba++QI=d8$dEVF=!+-tT&=vBW*|!8jJM{z(gG?ay zv9U0LH})xYY%t6bwJhbtD#>J#jiC&hF(gssL}jqs1UZKfU@4opg zsNUd&2a#mt5u*&n;oW$k!>Ni&hxcU4NyD8*dTOfR;gEO23@e?^djivi@=84=k2G!F&W5;V9EYhwY=v}GzzkVnqe3Cj!8vN4m@$ikd+{`gb70RBuu)c$ zu-B$?7PTd4oUzX@8WNMJSr91t3@3?wR4xg!`6PV`Y_p8~E+Lb#M5>j;A6S(D5hnF*qMVC!Gr8AGJ}Hz#T=K zIM$)maYh~5#%Xz_mghRqnf0tbnmg1)`4z`?$ynQ)?J`bo=N|4Me3L!~N*wgq6TGz# zYwU(@q=Lz%!^!-1%BeWcAv0ggVs?crX)V+#r zpAssD7gPL!#4WiE-$nuiZelXeTBVYpIz`2LxpJEM+*D&xVpAtxL%2Hp7V2@2 zLW?)@-0%*OT?OxxlGo*tYoAYC^>|;BDdV*35gW6ebGk7!f^6!}xUwNB!1w7-b&r|% zK;4L3L?(o(`?-@E0W!=z=ZxU_AmkiqUVd5*>-Sqfl0+`Om9U3l6ReNBR5@g*1GbCo zC!hVYW!D}ZrfmvK^a9?00R)?=#M5NW6?jjODd>%SQHn+D-{}WT{an}#xzo^yeO)+`bXvhyxk^Q%Lco{|Wravf%rl9+31kl#QN7mm`Nt)Aqprxx zQOVHM375Js{-H@pcxD*yGgKvn7+~l{=W9*O_`^q;twg8$E(V+LW>TBY)Ve$GWzr%$ za8P9RG@h%2dCf>E=_5q02^MxFlKLE^G$;&q=8YlBXH%zHom?LFpz;Styn(BPtXD{B z(+3l7E6n2%V`xAMhhjuSr_z~cSi_Z-1LB)d1xxhx1+SLt*^-epH*;g9LXnQI1ms+Os~k+P{b2owaXGVB zp1I`zx5O^iXGPOlVAo6&n(95&(3Eje z=ELP21giylpvYFE*letp=!z@kdb8x>c6{`t( z|7gu~mYAUZ4(@NWm?SWb^-gQ#9Wx9y zb649Ep^>@gd(PnZA4pDJ{`9`}7Zh8I zw(}WJ`I&Efi;RL8t1->un}<(LspM%ePZ)>d9wQ>r{vzv)_)I(_$yMf<+~@h?5^x~tWMbt8>aqVoqxQnw3CrbQFg;LM*yUk&hKg`&M@#s6Y&lc>r1%(0_<1|HhNwtT%g$HUC_ZM$ zBB1CFa2xB>u3yMUzCrAi>t@mIolh_ITWoZX;aPqsZ#lj1Or8I;f6aWh#-0r9GAQnr zSHV{*5iPPK`yab^y3cd`)sJP`{%cAN{Xyl0S;D}2kXBb zzpiQ}yj&8vC3uJ1k8l0?bic7JKWYD+4S#%mnsO;p59xl)#4w=TU0BD3eco}E)>G`<&r@18%jH2}{~7`C??mER@I zvw zk^)s(H#z204r}zM4jD9E}7GrpI}%qPX|=v_l7SP3CcWPFo5kL#6( zAceR$;qR#r#6==2P>IxDaWTi*sY+@uS!8FM98huX$6s+D9EpuJN42 z^vI+uZI5hX{G67PfAXd-b(?w^Tq6Vel-@krZnqoGpQX~#0Dh2U&6lIPL42q<;W6G1 zTSU%bku4u8$iSuv9k>fB_%Lxs7yb=40>>|l$7;lq5XTxbJ_hAvkRY?7x8+5SlX46^ zZiz$(B<6Y*%2RYmT8`=eA@hiqjJZlP8v`;n&r!sLHTyr>$+2r;2Uk4pDTxcg@p#arMMw?ITBdbF zyRv^dsW3yWg*Ia2FQ`f&XfS_rziz_LtnYGw$1< z5vru(^e+!p_i^(`u*_q_0MS49sGxflfqGaxOis{oUh)1^K*gXHXx45xi*A;7>chx< zpdBjSnuA;fEejh!Xhf`}Kj$SU_H==aPg28+O?Ek4WwO%4fp4PbARI!EfK(Jq zh0G73R*;}1mW6qBlt4^fY&co881iLPp?o2uuZbFV$xrtf(FJleC5Kcr3V!~z?Wizg z9qi_q!?0OQv#wW&>XXXO;@VEwAg!@J9+i^mpJw@gDmoLtCXW7(&uliy1_C4yLI?qt z5Doz`AR_f{1ugJWCbrgd0SPii#HLa)?N&MQbf>tuBa&h>CcW+G-=9 zB2pi;JgxPy{r#r@KwhsTJ2T(;9Pg!N_2Ae?Q-uBw4d`z)$t|q77@3}z09~hzRqYWL zNT2}`I}z=dCn{$oW7CKj*F1>c`1%F*iDkK<;6)zM*ah=nf$A+g9mF*Z^^(a2TQ7>m zpLD2$5j{-Wyg*qi)*yP2SY+69EAIu+rKohLS@~;V!)@uYM4>9ep0F6VyNw|k!Qhjm zU|;d`j{o{wz$djtTta_`Lln~0gnVNdDI?U^7Ez8#I6Qg}NDh_}Gzd*0RK%Raf|>jH zJp90K(i6F`d#jxd(zV(}S<#Bz;qQm~20+R0UY($z7D|kfvHNi+4D2b!w~04<7;Q_W z+7HkjlG!igUX)hLtS06>c=wj&=SdwZ!ZqN1_m}KvIG6BYD85w>5_(3q4LA>qXa84o ztGCrGo>_Yj$!MbYCm{&`;WyM#2JTJi-FWwSWvBKi=QGf3i9ccg=`)$Di=iXgyqgLC zC8sge78DqR6B40>)t&KaCa~#~bNyd~yt%V+irq#=J1IsF%r?#=#A_z2m>r6hpz|6Ud!kU|CEEB` zz5SfUr&jJT$oYU+$H(_TIBRCPcX9;C^8^J5egay51;z@%)DnOp*MnQ8GmR{i@xe=h z>_tvFzgcjA@l#ZVrW`aA~bSt3rG+TaCNR=*_&JGwCIIIvnPnWG04I`0qAjs<4!_U_)nJrZdkANP+&>?W-Bm%{z9iXRH5YfPqdpks`-`@NVu|Xo~PCE=XMi)%S$AH^s zUX=t1;hsjissm@GQv&{-EHp+NSTG(F4>`e_ej5QkAkT7Z?jB-JsV~>zCdNNTGPm;G8yfH zsDiJ&Ra8v>v8XU?oL^r1R3h08!Wm*~!K;ifJ_^>|&x1rw5(ULE?=q&R?6|--eAcfi z=FQ`oqsu`xZO2e#gYnZ zv~8EfuXziYa9bzsSOdG-!Re?MO78`9b5gaa(X{8DNM@0V#cjCrg5YU^%Q|jYd{}&N zAxT?=))4V~f!*OPF)HBXN162Wg3pGp5sI-AHUE}$clpIUeu{E1=s~4^-`W)6K8yL` zWy`;=AQV}ruWz(esRgI(=#UHLI~S7m@fE>*` zV9%~xfqgfj^Ma=HF9-^-`}1Sfr;Cha)0s)s);J8^9`ANk-deks^-WhAGC*jeWMO}% z>f%*PoPUC01QINe*+w#)*=(WWTPaQ7>?ueG^!Jv>=lJ_DyUh5R8YhF)PGLUWa4G3z zVi`}*_-yZIvV)gtMjkJCHgDD1Bkg2aU$r<@w5GB zZmd0i!g*0wVE5muXhcE09}7G;1pmMy(StD!7glZdNtf*dF;IV4JNrW$qyLeC?#HnS z1_uF77J&aO20yI)VkX}K=OiWx&YeCL^6e3sk~}A<%Y&`YD046wTK6P3>c*hS#d1bt zBkLow8tcS1G!D*&Rm1Y@69z>uIpgTIAb6IV^K)l4`O92oR7Zn+%ii6o1`0!hu&=vV=@rVa_;OQ@nq= zNj)OWII>@6-f+CJA9lh%e-k6=&xXK9M}Kw++f{mFWLnPbW$cc`UR8Hd7geI)~F2JWFNPoO;Q~*(k0Aei1J| zeFMp6Op((sMKHZQuvV16OYRUg-Dx<(S%09gaEmkeaPEx-G_wu1qCtJzG1PWp5&5 zd#gxZz2%iG0v z!)D-77azZ4wmc~UzJU|>>u+)@+96?eW7F(PpB}Fx`~;l94hkV*9K3HWC5Jw?Hp@BM z6>%I9r~3%2V%>>|RdGVskCwW^i8_r@s9O{-=0zkUI%06dEj%$Rx{7cpZn>TniOcTb z2oyk_)m<#a!#v9J8c{=M9ozdvk5+imSbZIUyaxz#_hPc)AZINMdu@W6!w2MyvU##O z4EVk>*>K#ck&t-EU#{z;={bcZ!J%33x2#f>=r36w;(U%knw+&oiFEsCG_hRQ173`@ zop|83vAzCMk8DlLmFVw)(+=2FAUyd($f#8j=u>tKUXNedvufTmRYoyk>!GXZU;J(J z(a?ThTv|+ys*R$9;CRTWiQNz0>SpQ#dEnCC|0u4i_O5zm z&MV(N9^yy?+ppfUYIeYk4_L8z?kbX^HL)$s6Yl@kzD@(NzA|1NF04PsF|cRd)dS}o zf$9z6wXQQDz>V``epuf--Zh%E|$e@9iCDjd_3{HMGIy1|RR(qRNWriM! z9`pj?`z6Re{?Q7+OfNq05AbVQgcif;qa`M9V#>aLZ@|uN)4V(rR}Rj}f(6_e4Pk+yT!{9?hY=b5Z98?lILKfh+x@KK*#@1&2x-W8ua@7dI} z-Hs!}DrV)oSMA_hp+T^d63)vzKQ{Mbd0XW|5u+5s@Hq0?)y#d>Mi zLkL^}I`)y_EqHFM$z&{Zhr%1PXW8<>dp<&FlN~ zY-iKNr%$lQL9RkE{myj7J{>%}Z{9ZC-2{e=9p?;`>1M1e% z>xCs{uFaQo$f0+NhDe|abmM|U&%+;N3jA|#XjhYrV zk0B#{^CS+d!f1V`+} zajvGoAuaUStQc29TohR-J!}~-|Gt|LtVt8Su>Y!c*JGLVX2{M72PIO@6Qngc<-vjn zyJo~$aGIaM=0?sB4)vdpdhuV*teztmZ#pRE2fzmbBxjq{bsxlxh>%KVbM}o<1+xdF zF2yOVCB;!nMwCh`AhRiXY3fqm(ryZY^@*0OEPjD1dRpf#t(@C%2=2_k)-ZxGop465 zA28426-VjqAZ*&w%vkFgBcQdhEfZ1x9XT6n<2sKX)duGn^?&8R`0u_cZ<=iS!!~Df zD7c^WN|=sn$b^Gwxs#k!Z2kF_I_dtkr#5gFjQ!`rsivS2Z}a`$18Ie;r)J$Q$}X>p zXSxEO6)jbrZCnRF8rXmQeE({rW>n(0W+y%O+zbCl5irL8YhqfNoY}Q_$dPeK7aBti zHE@XC8XuWtRA-2ZIB<@?O+?*u}0DJss;&8dXbz{h#VvE?g}NMfT1}AT_xk9`)VnEA-^! z6g#R@2^{vrb`WNivknitPV}_gf#YQbk2CS5{JXpSK0_-Z2oCrl*@SbLQxUenDFv>d ztW<>l?6TGOE?GG4?R|eo&bqcu4R2xhFRq#ENXGHoZvNe~hrQ{ra-t4~4uhq0Oh6x8 zT~c+fNCZ^&w8+w%`h!#8`mVLG>u+v z0T(8Fcl86vK@Y<^U7Pp1Jd@71YgujfQw|o~BW(Lp$JY7JP?>McP2x2A%P-&0?LeMo zbH7d8Onps0{QBpoS#ujAPx=^}Xn9U=*t3N-7p_LVlmE{5)7yg?1?)N)GVyIlps>vRqA@owX`E~%XcW)>6nbGA|ME4gO&;|M#*m+|N{TuM zY4R3lQ%zlfkwpiQjeiG`O`X6BtGtfr7e9Hxxs8)`QYq@@eFCTssVBPpKCpbNY=AW= zleWxP{0ZyO8oj(oF^Yri8A!HO@f02a8LSm$>G=g(VFY@2$CaSY2wsyVykrOzu~eF9 zZXr!7zv2;h8!#*Z2rW(6cO#0TTnQYhN3PWlA;@`<)FC%_hX~}%<8F;k?+_kXq+Ol(5z=8A~{0^L$P~|Dj8|hB7lL}lncHO^RN_SY+&`js%WLymc^Qe z2`9-EtNO-CEQ#qIzOpdN0XV>a6ap%FSv#oZHhFsEP8%*6)19=0>v+d!VgWha#tOat5Gf>LJ78 z4hPX3rvtU1nA!A&0z?L%5}$fpMy4Pay(axGCoce{7B8gGa?_hL;1IG}{2_eON^C>% z@A*@9tYie*EQxWhvHd(Fj%q6UpU1)i?L;<|ZsOEsa{1yQ%7T$Rw{-)xceyhyM%OSe zRyGXk(b4uzAlxI!B=V^+#h1v;BQ_$O4G1ApB=yh*4Qty-PL&IlT=o-Kf>atv@a#ia z1Z@8#4Vgxf3rKe-GYk=&NDkP7cY8LatUcOMK+NHhi07o@eH4t4}aKJW~+VKfN;!URW%_3N!5d*=Z)1uiB*66XcL|% zmlPTa^PDkp4SN=RR+b2_Z~e>9iexOFb1dir`^3_#g z%t!87+Vh~4)x29jMRf>@m0iVSn5?E?M65MB?@A9S_T-hlT1?>&)w~|Ep zxDW&+1c6U)saYxj$*ajr9JE7J2#|9mS9ff7171GWgb%Msw&a8Dfa_OQB{DPI$K3^- z>adJ&X5oz|jh9{mGC&}Dng&gH)lS3-cBi)siCjv++ciO>`;{U+N$pLv*qd%`F07`n zhqmGiKL~=NZi{0ybTo!o6=I{M+hyv1@SD+5xJQWHkP9|C>p$0e3nRyXP|-tUl<&y6AA7Q$|@oJbh001)n5f z)_z}Y7!&>CIp2d{n_;~%5tLSIuHq(m)FX`;tEzJt#kUYJa^N%tirw*P!lVMrdn+#0 z%GEyeBF;yg0jpy*mMC0kkcqM*jS9sKKiGV=z{AYv*R{s@ z{#RUS+izatyB6*L2QX&G3(FJAT|00SFEoX94)f%ToH6YnHd=TJ75*SOJJ*;x z;MIG5iWK}g{cr^cv95PPf!<|!2d9qh;WgFbEh6B*;QIwBbpc?l0XP0nVq%;gnH`!# zw~HnU4{@l^3=A<)&i^97b_gaA&-mT4`KBWAJp7zxko!G~*=}9#;khJbf9;POAKqQB z%iRSgEedj1n5ieVHg=QQUmhLLSou{hT+pE+qdZ@M_3e3|QYoO1p@}_-x<49C$dL|T zkk_*UDmBzg=|Y0&1D^~EE5kcv?hnR`NTfaL8ruo@hyp?;iWM<3TT#D{G&q5iV z$tsCP@iE?+mlY8|i|cvkYMB>dCFU3xUQQ7M_!x7cW$lEC_%q%GpQJ>ExDL5?$b*R> zaTDXJm&=G>#5%^UTAok%ikrt-XPQj1hyjVL`{F)_3cxWDgH$!mfg=p=q;}9Jj-$Qj z1`A5Eh&gw=#yhBjoeM>u^RArYWQkcUhy~8Ivou)#-(I%#%^9(HU2R%^%oew*&+hKu`1uZ_^PN2OU85 z47`d=0+Bpn3cEVVI23t}7Zq@_f1q<%pBmR314uVO$7uS#4daPKFF-UB?Xy7p68EC@ zp}geF?rae@lv&qRfF2jcAtz3^0XR{jg+R+Xdlb{^DGlM8Lb#xuZ{w)iE$JcPAYXnK z8*8>2@EIx!7J?0l&D58@!#Jy@W1W-Lyyc~3bBa62*}!#>Zn5L!X?Ve7_;Ue%R{UXO zu>*u0js+g?;z+^wXon8-i2erByM^wLCHr0SVtjq{L1?!qkKxlKns4_QW!)|zb^sC%M>}EeX z2&IWQ*wspyWz$rdsI_ZFCVsHi`jz$v-5xNYNDPfL^z7u|oWc(KFt@_%|A^x&KMjj8 z-03`vk1UMikt5T;kJK;ciPVfoD$Q~K>f9Tmu2s+9fTL{skJTzh5-c)s7^|}ArgujS zh?+%;N`Pz(&JA@}TY(6fuv7%r=7Cq2@@3)=FHxilxlO5;;sab*wFj*o0X0%U1j4U+ ze`wG!sZ*9HEOW82o|WwSxqI!=L4>jAI& z8aU@mr1wsN!KuFp4;Z8CNF_@S=rQ^C1A|H{=jsHDE8dHr(U=2nG3AA9Y|No*?F#WQ zAzQ;zryDvp;#r1p`nCgTBLaaT6=yZ^LBEc)AHI`speePX7erIQlJ~g?UWSPo%Y!z6Wsa!oKrY0Hf0H`_t+|UH3?sv zGA%&v)0L(24sfe%WnW@0JkDRYVPXV@9Z-w|#K%fP)R-R}YYg^AGAzk;8C_{IU#`bI zr|b>DyI3qNrLuwLRx4vbxvW<^htEbEpNJOWynu|C%>~YWqT<;JSeJ!13vfVTki6Pm zW{y2<2%EbF%c!@tJzgTu;^S74?xu0a^%qH2U0gUtBnl*hKH@$R*s#z7QDOhiYW7X% zJ_K`alrdyd>A2adhU?V_ave&GVu+W8NdHU9h)pz}>Kok*k?j6ua84a~s>0$%#CnA3g4 za{@UaH$%le?jXC5a1KlW{`ORf5t6KQq|~T=j;1vJS}^|18Bwt<`3-5umpL5?nY`if~kV8{XEVP z0~48JkU^SQ&d>W?Fh!c!WLvD+e8{(Vv)`{>dC;!sxZT1X^FB0R;%3W-SbwEdc!Hg) z-f*9 zkNx`g?lff_W-YjnL0I#A#jDH(RR1+?*tFQ6%LQfC>)`XBemvaqI%2ugK^X}CPHX+j ziqD;Yl%<@Hu$l$+dD4q$&VkCmY5_GxT&LFhsfe7$4h);Y4;JEN-PGGA^ zk+woFI5&M<5LI{^M7~}D?!V5PKO6#Y7Vqv{cJz7VIx9dMMfYbIl-)7rezPP>IHPlC zmCprupln0!;_U?LfWd#Xdw{Pt20^Q3unpt_w;S&=WfK<<1#gBf{!(DSKnfNJ4PPt< z7&rG0kiG|GQ{?B5*C!P4l6Qd$43vq9*r|_!_SX9;L(Q`U^@B(j2l@vq2=7VjbyLs& zl}p-a(%qo^AoUJi{Ub+iNlk7c6FR>Gi<%*{O!~Ni8z#&um%muAd?oe@2HFkOgX$=N zhM~!if( z0>=UJ=*ly{rQJ#lKZSz(kJVe!D1XQajo$))nxyUFZa(cl`gD zy9H70Aj0GqvnXbD^x3amSc$EPUj0oA+#uwG{7T!3+`&gX=iI9CdBh7!%m1JQTGxIK z7`r9!mG3lRLY7s?gn^}cS<{!1RS(Whsu|g0;7o}NJBX%ljekL|26S1s&O#aev)>8} zXExkEw^^kC&fC{sc5R z(|-gmlg?;87K3rVQtm6*!(w=J?PhTL4#0FA0EbBn4n$q(Oo2A!O6$>lVz^8L(sT>J z1NE-k^lHLzL1JbDgAq1jjMm-U^ zJ27Hh3bZ~8(n`7gg!7G$#FgS}~O;@+TIE`$iHtZ59kG17b3s>5YeOOU22i#XjSkSP&u&pS!;ziOQJb)q;>2Gc@|g!hi9l2rz@cO>w}q@b~p3iy)~mfy@9Sg^m>L&W z;}a!{3XO-CQsxY7l^{C3*DGVzSSy*dirYa@(i);p3TRtm1joV7u5Le7FoEI21Vd~K zPb7I;`&&47J!XoCd~LqPzTAy3G9?&Iyr-`Abp1qs{9!s`(-Y>M^tZ4tSrj>J4pc{+ zPuViSk(Rl1zkQE;1>=r#@mm#~=GZl&-bn^q{LD>?l(c3`ZoVrML6AzjDqIzJB|NdU zHxn*&EC{ zZ_lXheoy${r7D03f+nwp@?qKp5OenWm zsCk6umFDSHRBepP zaJhLu|B>`K*9R}}Hd&V{Wz>O>OkBP=vy~=qI(t|%njIbk9iAVH)@z6{w=hg$+3MZR z<4{=U=9fP6&t7Pi>vc630>>&eyue#DY01-H;?`AUhzz;DZJwS9Br|fi#*ao2Ggd$# za``u2o|IHjLl~VSDBv0Tw{Ju$$n?UFz{UlyPI5-V@s!$lO7nqVoqTW;yx>$rs5+`TI(SF`!@WMI>cu1w8mbZdQ+IfWe3 zwC+H^751b!Ol{pFK3n&AO8ZK4J^H+erB@{tWw8_-&&Zw>OT!rZlgNt;(k}9C-3ccy5Su8aa|oh}vtP z?H4eP{-zNlxCVRh(1%_4Y_)`?KSU5xdtT|e_?fL!3v90pSpSD+ff9sbo?2o!1|zTz zAbmOsXiSg@(xTUhh8RJ=UbL5OhMV&jE*^IT%COmB0J+0$fCS>zq16x-fNfy9@_v}d zy_hGjcf)ZXj<7aWp{My2tvzrT%vc(><(p&=EF!@c;M7yn^M;tYuGo2l$v4H0vk5@A z65<=bI$tmWbdXNF0mBbn|2diQ_RD3buCv6KOBe1un55@3K$f2=>#_>;fQ+}K(KRZu zg*ZVzr3Enye04;UuJ~-_pD&iX5B%VAVO>`le6~4gRs;qPL>HD!$G0EWcRF>QPVH72 zyVBqj?XQ!k)a_wG+B%DyPXJjr3Z|NHJ14nc4m4?W3<&)P;JTjknTt7txn4@|5*6z8 z%1RG_JuhQM3G7fWnMlcX-hFXZ<83D;9Faz+B%2vkpRB1LqV=#aWqbJnr{nK>m+y$~ z`O(5V(~?mIKTV}Wy;={vdOtK1ei7kbHWb*fdnQem7{(Y@5Z$s}#o5h1SLGh+v{zxF zimqduhU|Iu+b?DBg>vvpkd!B1^7{#?KBIfw*42D(Ff~j8ei-Rv+F0OfO8&25cwTR3!qoG4nN9#%w}S8&hWoA)}A0a~M=}jIgiJj-wb9k27I!9~~zU)~&eqk2S6QB)5T7C}439oru%18?6 z)_9B*vzEldJEGrK2$0+RECN{%l8gPpi3#9ziioYyBE{3j?<6$Ym*TqD$GqMt#M4Uw z7WkSP9V?7Ne?jh?^6f{wbF zAqdkJY`7fJw>K+`0NQ6q6stIf(9N6Q5+pu)2Luf}vsz|kZ08lrtQP}Lp>?q5-U@Bz z4AH`X1Lkv|jie6_S}_Vb{(`ov2$od*9dmwTVFflVFsNa%aiwK6cDs(xKV1jAr_4B^ zBe_GV&?Uc6^nAHn?{u%F#o=h^L)aA0X77MNbKT`lPw|e6s@IN$IeCs`Umt=7_#~&50U`%in zZF`ptrZ)q_p+uI%eQi@tB#JDHqt)g}WQN;3hWoIm;e*i-l~J<}-ri}T`;Fx3{tk>4 z?CXo!$d?g1Wqj`)jVvm@BVNd9$q|#g#vCFnVBV7?6Hcc!JP@Y$5N_YPkG+P#^)s{r zth%8AI=`-mC18SxtZ^>iaX#aF*+{$j7gnP>T)-ibYQkcm@4Fagj}gPwrS6h3Ostct)7GGAmmibc1=l0rbSg zh6}LAqM9RYSamy5ZtE2N00|!W+|c|lY2_))warr@kMC%Z#&h9Bq-@ePbp1#4wG&xi zu5%-y!B#Fzj^>n#FJFT=-U=v4JmH3^Y#EzTtopC7Gv`z!BO$1Jz^H~SwY%_)15go& z67r(d_}2ov7hs2_6;j!K69l#u*zpRH!#c3)jF3@EanAIw&1)DXW}!*X$30+`YD78k zB6YqmWXFIZ^4gI&m`;I>i9JSc10%vn1i_0IcrEtWC_aDfuc(oaAwt`+w-b{$A!$lJ%yTLayH;qzNvxx~zWibu5qqw}4PPFGB9 z7$L(W&vy0)4U`vI8y)mM=1&nIlxkhsACEJVww)Z&2p}-bNb4K#0E`dLR606gCqDeuP9X;mC!c#W0a5*?^_CS~$T-V(@#zE*kD=2dNV1 zDfp4KqN!$>Cisa}q~#GD<#!zEI&sT*DOzT9?htO41uSEtjizG`GogZy&NZ!5s^B>$ z0eg;b1gYpVtEt3k%)Q#j$8jrW+gD#TLnu)-MHY%Ad?9TsBmXB7GX=b@o~#rJnL^H% zBy&XqzK~V1q*mm~r_-wQypy^N zuPy@L)XH4Rjc6}_wpv~#ScN_XXNRTt1o*piDjSd>kH-pQSsa9{tqyR})r1Mirpu@r zMuIVFwA8E5hLtNp2}13#m{s{>@nLhBC0f1TgAxPzFtp$#FDNr!rGV{Zo&$}XI!HU< zVsx0P1GtoP0R3BipkcKooC~}y3T9NFfnr3pQ2ThQ zW6u$`ND~G^+g%obIkH2b9F(PkvTbS)@jx#}$dU7FTd64)CXW<|^h6*C|9dfIiA*Lx zmGSv+W;H$zpi>ffS?#xm=Xp2U0L{`YayE1Lw3Wtwu#~56>;ob02ta!P9n)PR?#JDXwFHVh&vYkkhrNp#1acpc)thL@X2|NYk`ZA2yx}8N%3A@ z)(=Cw^He(ULB2Kv&i~!KrxGM?B^-^4!9Zvj#(lgDJ(-5P&jqmbO|Hrk6wH4MNyPd- zMH%CyGejJvK^YySaIR4m150R`iEux%%e+rjvy=nK0A#Fzb;lm%hx%q=(Z8>XsT!YB zCZj@k@v;)*dt-zpu=R&BCPhQkBHJ+NQ1_pA2dCNPzS}{TJDCMP@c%>9RyQR!Q%_WD zh6qmWp9XKs+i-uJ!ssL3b_xP4LyGnrRS#h`CyN)&Nqz6rgRo045`nC4t#}Kb+;@J? zupHo7J+Taq$^=4uYnD;0m28=L0qHK-SkQwjwS(IM^LK>-bl_MeA(%*{M6HTCKpjAF zzxjN^&}9X=1Okav1y6VaS^9h~ViA%%fI{2%mk)U9y^FjPu6=iwa4%~l@k{DZSt;=9 zCyNrb?kivZ0GxfoT=b;rqk`xhurD6Gq(B(fIT=y~9tDF)cft%;+M&%^GhP+}oZesi zMRLkH>(FLQA4Jj%PJ5)NbCyp z67SU$&d3Uw0p`OK9J#+MMFx3lIiklv=CBB3R}!lm2EoF-Pn|);1|$o3E*rNrtZr_A z$N?hNc@SQN`%fw@ud-jqQn`RT1s=)-AaqJ7NW=!LI>{dCt-opc%N?*Y0UFs*P?*)Z4##c!dXikI(rThTcNm#>dC-_8iUFcazRpaT-xepb* zcj*Qe5Q_|Va4~Uf6|Hau^rB5Hm5&;BLqVdzf~KVC^dJ;`dO|i$5#NLcV2j1n&L|DW z=*C;!uLT5ez8G|<6-f0vSgNoAw?e?rfqsDTGflv~A|#W-!bw>8Y{&(?pW;YU-%Ef> z$E)-aMv6tJ4}mLZrEToY`2|J2Gr-n_?W_8LIjgB>stJ1zmd=zhn6w6!h>1DLlcond z<(S5{kXcqiCjaH))(xnG^7o~ek00K3FPqx)KgsO4akGC>`V-7 zzp{^43-F66DgU*T6A+O?&fLfR=s8GC3BGI077-OnFf`ab>#Pj+oz@fGE_ORM^ayXP zX;w4Jt(&ZV=(rU)VEM2N-|fBJ0rP2eM4qXfm$?9cRti!s07D`iTt_$qI<3)}5e~^? zKo3tqpicmydN8?rYeQ1|lz>rTb*K_#L~R4TMfOOw+Xr}Q05brlWmzB0nfcK#Yn%3G zPMO*VW}htQl_rS#Z(ehR*uWB9;yz%H{G`Z^f0qY&@aH;J3I=?FU8PIT%}7)Fu#sJP z^2EkJYzwEHJ*^dCo-PfLE5YcVS-?D0zU)cWW!6!XCmn6S{UFj_=gPuc^G#m&@N3>>k|)r*AJ zhdi3GK3x#{bWod2kAni^RMJ8=D2rP9fJrvW7=aG9EAd)sSM^X$99V$ZX zK9GIkX(6Z71GSZ?xK&umi;REf6@OthJYI{ho4>oq9AmslwSBV&rTzC-Y<4C(R~&80 zaWISZT0cn6KqFI>5PiwFsVTD!fciKl_x76?5s_HdAue&G z*~LUi8I{_)0!0ny?t>T}yk^58-UQF(c(9(%S{3Tt8Mbh#MCs)LW7uih0^W9O?w z;OFNqAA4UdA$R2KN}0zyeO(K$Md31&GqcG%eP8WwwNj3vO2{$adg9jy`*lP( z`j~NZf2YUfQ=JGhv-cc!Mq}nr88af%Vol^kAXr1sTWu6qtWMk9uj6yDzhO0dMyyWT zh{oeIIcw-mfu0vRtA`ag1fF$4y&>?d34GLvtfd8g(7+yWe(Godub`FV9B~^ETpXTFqL!X5W@wrv_-e^g|QT z^mUrt)mh^iSaZ+P^y>Ir9WSidH5ccl@zKMt`~FQ^%PHb_(J1HLZrk^ZCinHObknP& z+w1LmSbf0nhiAm=PtDg&)q}P=nst2mfj-1t8_C@))?7}+JL1*x`ryaI8WG*Jnte_a z!THjje_QKmLEM}*<_B?8d@Mc^+n#pyKehIr!1IOp+UDpTPj6YxzJ|d2i`WpK3FrSr z`G;2DQpAI93N$(M(BIko8>@-*WfpbLj+dTJzpb1zTovC7o{{=j;x`+qE0~Gr z4?+KHg8I*beVm;+Hv~2I&k0v!)@gB4To8*kXDw$nxzjHyKWmw{bu@J}d+38+x0*fl zuUO6ev>@Uqx*6<+rv?3}q4Cp?S=8{qDO|mz+|8rTyoQY_MP|Pt+%wT| zXzK1+0_Du2H~sLuE7*Tb92XCZ2gH`XxBOP{d$1#}iKeIh`VFn4sb3e)&wjM4qleWS zO}}+)5+xI zOhP2%si}mn#6ggXC#K`cMEt~bd~#}LhEsm|`(H1O>V%NXc5&GKo40?-Oj0=T_doC9 z?L+)4V(P$vi7wOWtJ%N3womcDF7X3XZgoDTb@^-`;eXzV10#Z1#}8t^h3zx^PwUKq z88 +""" + +from copy import deepcopy +from pathlib import Path + +import numpy as np +import pandas as pd +import torch + +from torchvision import transforms +from malpolon.data.datasets.geolifeclef2025_pre_extracted import ( + TestDataset, TrainDataset, construct_patch_path, load_bioclim, + load_landsat, load_sentinel) + +ROOT_PATH = Path("malpolon/tests/data/glc25_pre_extracted/") +TRANSFORMS = transforms.Compose([torch.tensor]) +DATA_PATHS = { + 'train': { + 'surveyId': 1027998, + 'landsat_data_dir': str(ROOT_PATH / "SateliteTimeSeries-Landsat/cubes/PA-train/"), + 'bioclim_data_dir': str(ROOT_PATH / "BioclimTimeSeries/cubes/PA-train/"), + 'sentinel_data_dir': str(ROOT_PATH / "SatelitePatches/PA-train") + }, + 'test': { + 'surveyId': 5000108, + 'landsat_data_dir': str(ROOT_PATH / "SateliteTimeSeries-Landsat/cubes/PA-test/"), + 'bioclim_data_dir': str(ROOT_PATH / "BioclimTimeSeries/cubes/PA-test/"), + 'sentinel_data_dir': str(ROOT_PATH / "SatelitePatches/PA-test") + } +} + + +def test_construct_patch_path(): + surveyId = DATA_PATHS['train']['surveyId'] + path = Path(DATA_PATHS['train']['sentinel_data_dir']) + patch_path = construct_patch_path(path, surveyId) + + assert str(patch_path) == str(path / "98/79/" / f"{surveyId}.tiff") + + +def test_load_landsat(): + surveyId = DATA_PATHS['train']['surveyId'] + path = Path(DATA_PATHS['train']['landsat_data_dir']) / f'GLC25-PA-train-landsat-time-series_{surveyId}_cube.pt' + x = load_landsat(str(path)) + + assert list(x.shape) == [6, 4, 21] + assert x.dtype == np.float32 + + x = load_landsat(str(path), transform=TRANSFORMS) + assert x.dtype == torch.float32 + + +def test_load_bioclim(): + surveyId = DATA_PATHS['train']['surveyId'] + path = Path(DATA_PATHS['train']['bioclim_data_dir']) / f'GLC25-PA-train-bioclimatic_monthly_{surveyId}_cube.pt' + x = load_bioclim(str(path)) + + assert list(x.shape) == [4, 19, 12] + assert x.dtype == np.float32 + + x = load_bioclim(str(path), transform=TRANSFORMS) + assert x.dtype == torch.float32 + + +def test_load_sentinel(): + surveyId = DATA_PATHS['train']['surveyId'] + path = construct_patch_path(DATA_PATHS['train']['sentinel_data_dir'], surveyId) + x = load_sentinel(str(path)) + + assert list(x.shape) == [4, 64, 64] + assert x.dtype == np.float32 + + x = load_sentinel(str(path), transform=TRANSFORMS) + assert x.dtype == torch.float32 + + +def test_train_dataset(): + DATA_PATHS2 = deepcopy(DATA_PATHS) + DATA_PATHS2['train'].pop('surveyId') + DATA_PATHS2['test'].pop('surveyId') + + path = ROOT_PATH / 'metadata.csv' + df_train = pd.read_csv(path) + n_classes = 11255 + df_test = df_train[df_train['subset'] == 'test'] + df_train = df_train[df_train['subset'] == 'train'] + + # Train + ds_train_multiclass = TrainDataset(df_train, n_classes, **DATA_PATHS2['train'], transform=None, task='classification_multiclass') + ds_train_multilabel = TrainDataset(df_train, n_classes, **DATA_PATHS2['train'], transform=None, task='classification_multilabel') + + ## Multiclass classification + res_train_mc = ds_train_multiclass[0] + x_landsat_mc, x_bioclim_mc, x_sentinel_mc, y_mc, survey_id_mc = res_train_mc # pylint: disable=W0632 + + assert len(ds_train_multiclass) == 48 + assert len(res_train_mc) == 5 + assert y_mc == 10113 + assert survey_id_mc == 1027998 + assert isinstance(x_landsat_mc, torch.Tensor) + assert isinstance(x_bioclim_mc, torch.Tensor) + assert isinstance(x_sentinel_mc, torch.Tensor) + + ## Multilabel classification + res_train_ml = ds_train_multilabel[0] + x_landsat_ml, x_bioclim_ml, x_sentinel_ml, y_ml, survey_id_ml = res_train_ml # pylint: disable=W0632 + y_multilabel = torch.zeros(n_classes) + y_multilabel[[10111, 10112, 10113]] = 1 + + assert len(ds_train_multilabel) == 50 + assert len(res_train_ml) == 5 + assert torch.equal(y_ml, y_multilabel) + assert survey_id_ml == 1027998 + assert isinstance(x_landsat_ml, torch.Tensor) + assert isinstance(x_bioclim_ml, torch.Tensor) + assert isinstance(x_sentinel_ml, torch.Tensor) + + + # Test + ds_test_multiclass = TestDataset(df_test, n_classes, **DATA_PATHS2['test'], transform=None, task='classification_multiclass') + ds_test_multilabel = TestDataset(df_test, n_classes, **DATA_PATHS2['test'], transform=None, task='classification_multilabel') + + # Multiclass classification + res_test_mc = ds_test_multiclass[0] + x_landsat_mc, x_bioclim_mc, x_sentinel_mc, y_mc, survey_id_mc = res_test_mc # pylint: disable=W0632 + assert len(ds_test_multiclass) == 49 + assert len(res_test_mc) == 5 + assert survey_id_mc == 5000108 + assert y_mc == 1 + assert isinstance(x_landsat_mc, torch.Tensor) + assert isinstance(x_bioclim_mc, torch.Tensor) + assert isinstance(x_sentinel_mc, torch.Tensor) + assert hasattr(ds_test_multiclass, 'targets') and len(ds_test_multiclass.targets) == len(ds_test_multiclass) + assert hasattr(ds_test_multiclass, 'observation_ids') and len(ds_test_multiclass.observation_ids) == len(ds_test_multiclass) + + ## Multilabel classification + res_test_ml = ds_test_multilabel[0] + x_landsat_ml, x_bioclim_ml, x_sentinel_ml, y_ml, survey_id_ml = res_test_ml # pylint: disable=W0632 + y_multilabel = torch.zeros(n_classes) + y_multilabel[[1, 2]] = 1 + + assert len(ds_test_multilabel) == 50 + assert len(res_test_ml) == 5 + assert survey_id_ml == 5000108 + assert torch.equal(y_ml, y_multilabel) + assert isinstance(x_landsat_ml, torch.Tensor) + assert isinstance(x_bioclim_ml, torch.Tensor) + assert isinstance(x_sentinel_ml, torch.Tensor) + assert hasattr(ds_test_multilabel, 'targets') and len(ds_test_multilabel.targets) == len(ds_test_multilabel) + assert hasattr(ds_test_multilabel, 'observation_ids') and len(ds_test_multilabel.observation_ids) == len(ds_test_multilabel) From 12bc1f48350bfda29ea3a1132a7b56e4a6b078cc Mon Sep 17 00:00:00 2001 From: tlarcher Date: Wed, 9 Apr 2025 14:18:59 +0200 Subject: [PATCH 12/20] Linting --- .../glc25_cnn_multimodal_ensemble.py | 6 +- .../datasets/geolifeclef2025_pre_extracted.py | 72 +++++++++---------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py index fc0a0da8..2b42762b 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py @@ -23,7 +23,11 @@ def set_seed(seed): - import lightning.pytorch as pl + """Set the experiment's randomness. + + Args: + seed (int): seed id to set the randomness. + """ from lightning.pytorch import seed_everything # Set seed for Python's built-in random number generator diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index 79263ee9..6133396e 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -14,12 +14,9 @@ import numpy as np import pandas as pd import torch -from sklearn.preprocessing import LabelEncoder -from torch.utils.data import DataLoader, Dataset +from torch.utils.data import Dataset from torchvision import transforms -from torchvision.datasets.utils import (download_and_extract_archive, - extract_archive) -from torchvision.io import read_image +from torchvision.datasets.utils import extract_archive from malpolon.data.data_module import BaseDataModule from malpolon.data.utils import split_obs_spatially @@ -43,35 +40,31 @@ def construct_patch_path(data_path, survey_id): patch path """ path = data_path - for d in (str(survey_id)[-2:], str(survey_id)[-4:-2]): - path = os.path.join(path, d) + for sub_path in (str(survey_id)[-2:], str(survey_id)[-4:-2]): + path = os.path.join(path, sub_path) path = os.path.join(path, f"{survey_id}.tiff") return path -def quantile_normalize(band, low=2, high=98): - sorted_band = np.sort(band.flatten()) - quantiles = np.percentile(sorted_band, np.linspace(low, high, len(sorted_band))) - normalized_band = np.interp(band.flatten(), sorted_band, quantiles).reshape(band.shape) - min_val, max_val = np.min(normalized_band), np.max(normalized_band) +# def quantile_normalize(band): +# """Perform normalization on an array. - # Prevent division by zero if min_val == max_val - if max_val == min_val: - return np.zeros_like(normalized_band, dtype=np.float32) # Return an array of zeros +# Args: +# band (_type_): _description_ - # Perform normalization (min-max scaling) - return ((normalized_band - min_val) / (max_val - min_val)).astype(np.float32) +# Returns: +# _type_: _description_ +# """ +# band = np.array(band, dtype=np.float32) +# min_val = np.nanmin(band) # Use nanmin to ignore NaNs +# max_val = np.nanmax(band) # Use nanmax to ignore NaNs -def quantile_normalize(band): - band = np.array(band, dtype=np.float32) - min_val = np.nanmin(band) # Use nanmin to ignore NaNs - max_val = np.nanmax(band) # Use nanmax to ignore NaNs +# if max_val == min_val: +# return np.zeros_like(band) # If max and min are the same, return an array of zeros - if max_val == min_val: - return np.zeros_like(band) # If max and min are the same, return an array of zeros +# return ((band - min_val) / (max_val - min_val)).astype(np.float32) - return ((band - min_val) / (max_val - min_val)).astype(np.float32) def load_landsat(path, transform=None): """Load Landsat pre-extracted time series data. @@ -149,7 +142,7 @@ def load_sentinel(path, transform=None): """ with rasterio.open(path) as dataset: image = dataset.read(out_dtype=np.float32) # Read all bands - image = np.array([quantile_normalize(band) for band in image]) # Apply quantile normalization + # image = np.array([quantile_normalize(band) for band in image]) # Apply quantile normalization # image = np.transpose(image, (1, 2, 0)) # Convert to HWC format if transform: image = transform(image) @@ -218,9 +211,18 @@ def __init__( self.metadata = self.metadata.reset_index(drop=True) def __len__(self): + """Return the number of samples in the dataset.""" return len(self.metadata) def __getitem__(self, idx): + """Get a dataset sample. + + Args: + idx (int): n-th sample + + Returns: + (tuple): tuple of data samples (landsat, bioclim, sentinel), label tensor (speciesId) and surveyId + """ survey_id = self.metadata.surveyId.iloc[idx] data_samples = [] @@ -288,6 +290,14 @@ def __init__( self.subset = 'test' def __getitem__(self, idx): + """Get a dataset sample. + + Args: + idx (int): n-th sample + + Returns: + (tuple): tuple of data samples (landsat, bioclim, sentinel), label tensor (speciesId) and surveyId + """ survey_id = self.metadata.surveyId[idx] data_samples = [] @@ -414,18 +424,8 @@ def get_dataset( self.dataset_test = dataset return dataset - def val_dataloader(self) -> DataLoader: - dataloader = DataLoader( - self.dataset_val, - batch_size=self.inference_batch_size, - num_workers=self.num_workers, - pin_memory=self.pin_memory, - shuffle=False, - ) - return dataloader - def _check_integrity(self): - """Check if the dataset is already downloaded and split into train and val sets." + """Check if the dataset is already downloaded and split into train and val sets. Returns ------- From 6f98990a0c76e16bea8c282058f87c9f2f21d47a Mon Sep 17 00:00:00 2001 From: tlarcher Date: Wed, 9 Apr 2025 14:28:18 +0200 Subject: [PATCH 13/20] Added GLC25 in test_examples and ran it: OK --- malpolon/tests/test_examples.py | 92 ++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/malpolon/tests/test_examples.py b/malpolon/tests/test_examples.py index bc1e75ae..eed79420 100644 --- a/malpolon/tests/test_examples.py +++ b/malpolon/tests/test_examples.py @@ -258,6 +258,31 @@ ], } +GLC25_PRE_EXTRACTED_EXAMPLE_PATHS = { + "geolifeclef2025_pre_extracted": [ + # Multilabel classif (species) + ## Training (raw, transfer learning, inference) + {"ref": "Benchmarks/geolifeclef/geolifeclef2025_pre_extracted, classification_multilabel (species), training_raw", + "path": Path('examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py'), + "hydra_args": f"{GPU_ARGS} {TRAIN_ARGS} run.checkpoint_path=null trainer.val_check_interval=1 trainer.check_val_every_n_epoch=1 loggers.exp_name=glc25_pre_extracted_mme_test model.model_kwargs.pretrained=false"}, + {"ref": "Benchmarks/geolifeclef/geolifeclef2025_pre_extracted, classification_multilabel (species), training_transfer_learning", + "path": Path('examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py'), + "hydra_args": f"{GPU_ARGS} {TRAIN_ARGS} run.checkpoint_path={OUT_DIR}_training_raw/last.ckpt trainer.val_check_interval=1 trainer.check_val_every_n_epoch=1 loggers.exp_name=glc25_pre_extracted_mme_test model.model_kwargs.pretrained=false"}, + {"ref": "Benchmarks/geolifeclef/geolifeclef2025_pre_extracted, classification_multilabel (species), training_inference", + "path": Path('examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py'), + "hydra_args": f"{GPU_ARGS} {TRAIN_ARGS} run.predict=True run.checkpoint_path={OUT_DIR}_training_raw/last.ckpt trainer.val_check_interval=1 trainer.check_val_every_n_epoch=1 loggers.exp_name=glc25_pre_extracted_mme_test model.model_kwargs.pretrained=false"}, + ## Inference (test_dataset & test_point) + ## NO YET + # {"ref": "Inference, classification_multilabel, inference_dataset", + # "path": Path("examples/inference/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py"), + # "hydra_args": f"{GPU_ARGS} {INFER_ARGS} model.model_kwargs.pretrained=false"}, + # {"ref": "Inference, classification_multilabel, inference_point", + # "path": Path("examples/inference/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py"), + # "hydra_args": f"{GPU_ARGS} {INFER_ARGS} run.predict_type=test_point model.model_kwargs.pretrained=false"}, + ], +} + + # @pytest.mark.skip(reason="Slow or no guarantee of having the data available.") def test_train_inference_examples(): ckpt_path = '' @@ -380,8 +405,6 @@ def test_GLC23_examples(): assert os.path.isfile(out_dir / 'hparams.yaml') assert os.path.isdir(out_dir / 'tensorboard_logs') elif 'inference' in expe_type: - a = os.system(f'python {path.name} hydra.run.dir={out_dir} {args} run.checkpoint_path={ckpt_path}/last.ckpt') - assert not a if expe_type != 'inference_dataset': assert os.path.isfile(out_dir / 'prediction_point.csv') if expe_type != 'inference_point': @@ -465,3 +488,68 @@ def test_GLC24_pre_extracted_examples(): os.system(f'rm -rf {path}') print(f'{INFO} > {LINK}{path}{RESET}') print(f'\n{INFO}[INFO] Done. {RESET}') + + +@pytest.mark.skip(reason="Impossible for pytest to run because user input is needed to validate data download.") +def test_GLC25_pre_extracted_examples(): + ckpt_path = '' + for expe_name, v in GLC25_PRE_EXTRACTED_EXAMPLE_PATHS.items(): + print(f'\n{INFO}[INFO] --- Scenarios "benchmarks/geolifeclef2025_pre_extracted" --- {RESET}') + print(f'\n{INFO}[INFO] Testing example: {expe_name}{RESET}{INFO}...{RESET}') + for expes in v: + ref, path, args = expes['ref'], expes['path'], expes['hydra_args'] + expe_type = ref.rsplit(', ', maxsplit=1)[-1].lower() + print(f'{INFO}[INFO] > {LINK}{path.name}{RESET}{INFO}: {ref}...{RESET}\n') + assert path.exists() + os.chdir(path.parent) + + out_dir = Path(f"{OUT_DIR}_{ref.rsplit(' ', maxsplit=1)[-1]}") + if out_dir.exists(): + os.system(f'rm -rf {out_dir}') + + # Create a temporary lightweight observation file + if 'habitat' in str(path): + if 'inference' in expe_type: + df = pd.read_csv('dataset/geolifeclef-2025_habitats/GLC25_PA_metadata_habitats-lvl3_test.csv').sample(n=100) + else: + df = pd.read_csv('dataset/geolifeclef-2025_habitats/GLC25_PA_metadata_habitats-lvl3_train_split-10.0%_val.csv').sample(n=100) + else: + if 'inference' in expe_type: + df = pd.read_csv('dataset/geolifeclef-2025/GLC25_PA_metadata_test.csv').sample(n=100) + else: + df = pd.read_csv('dataset/geolifeclef-2025/GLC25_PA_metadata_train_val-0.6min.csv').sample(n=100) + df.to_csv('obs_sample.csv', index=False, sep=',') + TMP_PATHS_TO_DELETE.append(Path(os.getcwd()) / 'obs_sample.csv') + args += " 'data.metadata_paths.train=obs_sample.csv' 'data.metadata_paths.val=obs_sample.csv' 'data.metadata_paths.test=obs_sample.csv' " + + if any(v in expe_type for v in ['training_raw', 'training_transfer_learning']): + a = os.system(f"python {path.name} {args} hydra.run.dir={out_dir} ") # 5-6x faster than subprocess.run or popen + assert not a + if expe_type != 'training_transfer_learning': + assert os.path.isfile(out_dir / 'last.ckpt') # When using transfer learning, last.ckpt is not guaranteed to exist as lightning my overwrite it with the same link referencing itself and breaking if there are no "proper" checkpoints to reference (which is the case when begining the transfer learning task) + ckpt_path = Path(os.getcwd()) / out_dir + assert os.path.isfile(out_dir / 'glc25_pre_extracted_mme_test/metrics.csv') + assert os.path.isfile(out_dir / 'glc25_pre_extracted_mme_test/hparams.yaml') + assert os.path.isfile(out_dir / f'{path.stem}.log') + assert os.path.isdir(out_dir / 'tensorboard_logs') + elif 'inference' in expe_type: + a = os.system(f'python {path.name} hydra.run.dir={out_dir} {args} run.checkpoint_path={ckpt_path}/last.ckpt') + assert not a + if 'inference_point' in expe_type: + assert os.path.isfile(out_dir / 'prediction_point.csv') + if 'inference_dataset' in expe_type: + assert os.path.isfile(out_dir / 'predictions_test_dataset.csv') + elif 'data_loading' in expe_type: + a = os.system(f"python {path.name}") + assert not a + + TMP_PATHS_TO_DELETE.append(Path(os.getcwd()) / out_dir) + os.chdir(PROJECT_ROOT_PATH) + print(f'\n{INFO}[INFO] OK. {RESET}') + + # Clean up: remove the output files + print(f'\n{INFO}[INFO] Cleaning up temporary test output files... {RESET}') + for path in TMP_PATHS_TO_DELETE: + os.system(f'rm -rf {path}') + print(f'{INFO} > {LINK}{path}{RESET}') + print(f'\n{INFO}[INFO] Done. {RESET}') From c043312f09e81df864bdff3c0ecbdfc898610511 Mon Sep 17 00:00:00 2001 From: Theo Larcher <42494948+tlarcher@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:38:21 +0200 Subject: [PATCH 14/20] Update test.yml updated libgeos-dev system package version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 39006521..5f21b62c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.10.2-1 + run: sudo apt-get install libgeos-dev=3.11.1-1 - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . From 065625dfab249926201e71e0d195d50f5b9c1785 Mon Sep 17 00:00:00 2001 From: Theo Larcher <42494948+tlarcher@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:39:02 +0200 Subject: [PATCH 15/20] Update dispatch-test-examples.yml Updated libgeos-dev system package version --- .github/workflows/dispatch-test-examples.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dispatch-test-examples.yml b/.github/workflows/dispatch-test-examples.yml index b89860a7..4b5e54e6 100644 --- a/.github/workflows/dispatch-test-examples.yml +++ b/.github/workflows/dispatch-test-examples.yml @@ -19,11 +19,11 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.10.2-1 + run: sudo apt-get install libgeos-dev=3.11.1-1 - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . run: pip install -e . - name: Pytest run: | - pytest malpolon/tests/test_examples.py \ No newline at end of file + pytest malpolon/tests/test_examples.py From 13318467711550015aa921db1d90ee122b71491f Mon Sep 17 00:00:00 2001 From: Theo Larcher <42494948+tlarcher@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:39:15 +0200 Subject: [PATCH 16/20] Update dispatch-test.yml Updated libgeos-dev system package version --- .github/workflows/dispatch-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dispatch-test.yml b/.github/workflows/dispatch-test.yml index 82a367c9..f5ca43e1 100644 --- a/.github/workflows/dispatch-test.yml +++ b/.github/workflows/dispatch-test.yml @@ -19,7 +19,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.10.2-1 + run: sudo apt-get install libgeos-dev=3.11.1-1 - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . From f3f5f9e743db4bad750dea9b3dccc10a9d356137 Mon Sep 17 00:00:00 2001 From: Theo Larcher <42494948+tlarcher@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:43:19 +0200 Subject: [PATCH 17/20] Update dispatch-test.yml Deleted version specification for libgeos-dev --- .github/workflows/dispatch-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dispatch-test.yml b/.github/workflows/dispatch-test.yml index f5ca43e1..e974e199 100644 --- a/.github/workflows/dispatch-test.yml +++ b/.github/workflows/dispatch-test.yml @@ -19,7 +19,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.11.1-1 + run: sudo apt-get install libgeos-dev - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . From 2aa13c8152d1ce956dbac46c6814ed5506e71c03 Mon Sep 17 00:00:00 2001 From: tlarcher Date: Wed, 9 Apr 2025 14:57:49 +0200 Subject: [PATCH 18/20] Removed libgeos-dev version constraint in GitHub workers as several versions couldn't be found by GitHub package manager --- .github/workflows/dispatch-test-examples.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dispatch-test-examples.yml b/.github/workflows/dispatch-test-examples.yml index 4b5e54e6..f84e305c 100644 --- a/.github/workflows/dispatch-test-examples.yml +++ b/.github/workflows/dispatch-test-examples.yml @@ -19,7 +19,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.11.1-1 + run: sudo apt-get install libgeos-dev - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5f21b62c..09a6bf8a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' # caching pip dependencies - name: install cartopy system packages - run: sudo apt-get install libgeos-dev=3.11.1-1 + run: sudo apt-get install libgeos-dev - name: pip install -r requirements_python3.10.txt run: pip install -r ./requirements_python3.10.txt - name: pip install -e . From 78b5cd8aa7bbd8378e3466d8887d00d99861148a Mon Sep 17 00:00:00 2001 From: tlarcher Date: Wed, 9 Apr 2025 15:09:59 +0200 Subject: [PATCH 19/20] Updated setup.py. Fixed typo --- malpolon/data/datasets/geolifeclef2025_pre_extracted.py | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index 6133396e..94aa16b3 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -1,4 +1,4 @@ -"""This module provides Datasets and Datamodule for GeoLifeCLEF2024 data. +"""This module provides Datasets and Datamodule for GeoLifeCLEF2025 data. Author: Lukas Picek Theo Larcher diff --git a/setup.py b/setup.py index 5c2fe56b..9831e488 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ from setuptools import find_packages, setup setup(name="malpolon", - version="2.1.2", - description="Malpolon v2.1.2", + version="2.2.0", + description="Malpolon v2.2.0", author="Theo Larcher, Titouan Lorieul, Benjamin Deneu, Lukas Picek", author_email="theo.larcher@inria.fr, titouan.lorieul@gmail.com, benjamin.deneu@wsl.ch, lukas.picek@inria.fr", url="https://github.com/plantnet/malpolon", From 72d5911c278016bcf3fe6a905cb72ea39890b394 Mon Sep 17 00:00:00 2001 From: Theo Larcher <42494948+tlarcher@users.noreply.github.com> Date: Tue, 15 Apr 2025 17:10:31 +0200 Subject: [PATCH 20/20] Glc25 (#78) * Temporary removed call for quantile_normalize * WiP: Added transform methods ready to be called in the pipeline. Need to verify well behavior of pre_compute of linear quantiles method and lint file * Added custom transforms for GLC25. Changed GLC25 main file weights dir directory to avoid re-downloading the MME pre-trained weight over again. Renamed GLC25 Stats folder * Linting on GLC25 files --- .../config/glc25_cnn_multimodal_ensemble.yaml | 2 +- ...llite_min-max_values_linear_approx-100.npy | Bin 0 -> 160 bytes .../Satellite_quantiles_linear_approx-100.npy | Bin 0 -> 131200 bytes .../{stats => Stats}/Stats_bioclim_test.csv | 0 .../{stats => Stats}/Stats_bioclim_train.csv | 0 .../{stats => Stats}/Stats_bioclim_val.csv | 0 .../{stats => Stats}/Stats_landsat_test.csv | 0 .../{stats => Stats}/Stats_landsat_train.csv | 0 .../{stats => Stats}/Stats_landsat_val.csv | 0 .../{stats => Stats}/Stats_satellite_test.csv | 0 .../Stats_satellite_train.csv | 0 .../{stats => Stats}/Stats_satellite_val.csv | 0 .../glc25_cnn_multimodal_ensemble.py | 55 ++- .../transforms.py | 360 ++++++++++++++++++ .../datasets/geolifeclef2025_pre_extracted.py | 19 - 15 files changed, 414 insertions(+), 22 deletions(-) create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_min-max_values_linear_approx-100.npy create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_quantiles_linear_approx-100.npy rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_bioclim_test.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_bioclim_train.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_bioclim_val.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_landsat_test.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_landsat_train.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_landsat_val.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_satellite_test.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_satellite_train.csv (100%) rename examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/{stats => Stats}/Stats_satellite_val.csv (100%) create mode 100644 examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/transforms.py diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml index a4896d6c..6f8dcfde 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/config/glc25_cnn_multimodal_ensemble.yaml @@ -32,7 +32,7 @@ task: trainer: # gpus: 1 # Deprecated since pytorchlightning 1.7, removed in 2.0. Replaced by the 2 next attributes - accelerator: "gpu" + accelerator: "cpu" devices: 'auto' max_epochs: 21 # if resuming training from our pre-trained MME model, needs to be > 19 val_check_interval: 100 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_min-max_values_linear_approx-100.npy b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_min-max_values_linear_approx-100.npy new file mode 100644 index 0000000000000000000000000000000000000000..e759031ff80e3f3d86472ec2809d71cd97d7a426 GIT binary patch literal 160 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= qXCxM+0{I#yItoUbItsN4WCJb+hKBtN3yj^sv;&Mbg76Pmx&Z)BQ6w?| literal 0 HcmV?d00001 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_quantiles_linear_approx-100.npy b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Satellite_quantiles_linear_approx-100.npy new file mode 100644 index 0000000000000000000000000000000000000000..b9c269a743c91ff604adba4793760957ed34fad1 GIT binary patch literal 131200 zcmdVjfAm}RedqZ*jLXp>UaGJ@jF$1zTgD|C#>=S0B^uVtd`f+&mUTS}>rf4G$;B~5 zaU4f+9HKfdxh1ZnQkN*ip&H`QTg4?>>bfe&|)bCV%^n-M*mF+GzBdz;>8_;Io5 zvp1>O+YIQL4B6X^=-G_v*-Yr!^lJQgS9e<4pl4FCw^^oVGoWX)M$cwQ&t#Lm z&4`}OE&#z01S@t%4dNvF6TvhaKmg(6H=$WjscQvGMvq{fp zM9*fIp3Rt^%|1Pw2|b%5dN#d>_;qM8%ig5V-qiwqn~I*vGJ978`ZjCyY=-n~HtE@n z=-KSjvl-Jf*=O%+Lf__yo=xuy{Q7e=OV6fH&t`$1O-0YuGJTr?J(D%|u7>n&HtE@n z=-KSjb2X-Kvro@vLeJ)io~vG+U!NAU>}~q=Y!>L*RP=0?>DdhExmu%dGo)v;NzZ0P z&($t{lQDaneR{4Y^j#g%x9R;Nzb-9i*}Lk~ceOy@rlRL+nZC(@y{k3)HbZ)@HtE}p z=-KSjb2X;#YM;K%gr2J-`mTC=`1R{(mYz+Yo~s4=t}6Pjmg(CJ=($>>?`lZj)h2zL z5j|JC^j(eVyV|F3GohE(5qCDdFY@c#(JVbzefq8z=-X8ETrJa2Yrvh&8a-D-`mQ$V zyBg8A*`?=dOh2uC?rbLX(mLYKrni@0?Dz45OKZfPt6lo8#`In7(@$%{ovS1IHob@WeUQ>DeOG<@t`_LKs_3V+%$=(NeOGJr z(;9N;YLmXJ5&g7wxpOt9?`ogEs|kHqNA%O`eTm;Ej%MlE^y#@;pzo@p?`oO8s{#GA z*0^&ur0;5zep(~$TT)u-=jp1!LE z`e`k4=c=ObYKgw9W%{mG=-UkFxmu;4)*5%N*6G^}>ABjVpVlUKuD0mgjOeAc!=0;L z`Zjy?T#f0cb%{Gy`}AEM&`)c^ovTCou8!#29MhXs@0a;=!D5EJt6BQ4=IE!@=gwxH zo~s4=X)SW+s-kbRM9f8=(*aV zpVlsSHhc73jp@6(MBmjueVYS%t|s);I^@pgh@Pus`mTD9@bTW!3_Y7!damZ^r`6}q zW}cp_1^PCN^wO%hvst3&YMH*P75X*Z!S*Pb}NZ)3Io~upzuD0mgjOe-A zq3>#!zReyzn=w6Cm*~6Nr*CsW&t^i;)ggV8BlfP2>D%eUk}$n?rgwNAygN+1vE?@$t`KhPllwJy&z|r9N|4 z^Ym>N=$S0CH>udWTB0v4Gk3K@-_?M=w94Go8hvS<`QPRpcmI56FTK!WgT2Wndz&qK zHY0i_JM3+C>6z@YHyN|HxkOLeXKr&q&((y!bjaN1h@Qp9LweE?bDLv&Hog6PoDpWou4d_* z%&|A=v$vV2XR|<0T4Zih(UX>#+bq+wS)nHlnA@zLCk>g~Y|yjWq$h1L zw;9p1*`a5$%id&}J<|fPRZC2<>1Li--M}PPCp85p)s>K?6X`Q*vke;-`++>rz zw8dN+F*n&^Z?j8J+GB1rre|`Ay|mBV=763wVQzBB-sFhAbj;kQ_pAIqH<)2A%`!Kc zV{g-^C(ScASzs?MGB>H%+bq%3mdPe7>}>}0v{kan8he{{deV@&w8323WNxy>-ekmH z+F@?8%id&!mfVs3KGUh6%|?@Nmr_9nCJwK=j$pS{gI zJ!yftw8-3~VlOQ*mzJ4JE6hy>?4?!a(i(GHnwo5kI zV=s-FOP82S`^-%a*h>@UCWq{$Bj(aEbCce$@%s%k7PIV4=GaSp=F&WKlLhwDB6F!? zZnDH)TPB;Vus0d7*H+0UYwWdkvNU9FvcX>4B%5rpHyN?lcE~2X?4>>C(wMn)iMh1T zTsmNGGGVVBlBFZ&(lK+XcYxm~!VKADmc2Gdmio-Kd9uj@dufrmR*|J8=F&2AX@$8o zU@omP*Vf1;>+H25S=wMOZ8F!k$kK?pw8LE6B};qEr7?5u5?R`3E*&t}CS>W5xpqXB zj+slnU+32`W(;PTOLNS%K3SS)E-f(E7Rge@Tw5Yb%gm(}=GuTPtuoiv$l5wt8ZwtQ zm^Tf!m}?`lw8LE5W!^IwGnXzg*Y?TU0a=yx~>1wH2~9AZx2+ zZH-(PhUA99CUb3z90@yQZI|2=#$@dhS=%RT2joOJBx^_HvCwPs`yN+dM!1rk{Q<6f z@e4P-pLx!p&s@5Od0to`n=G=ID(2D+%u5E#%(a`z72y_gAgq$5JDAsmyUBIoUUG>0 zuwk&tTzin*5*{K)20P5PN61}akE}gGjtwp`*PbHxg#)rQVXhsLwHL@E;h3zwME1VM zuXoG{vt((Gxpp<#7v{;@0$IC`TwHeZuF7+_k(IDS)|ScA3Uh5h)>g^d8d+N>Yxj~v z+=mU}0df-$VhaypgdJg*tnHDtFAEB;}Z6T19E~x9N`$fgVP_!3}!KhKIXB2MO3%} z%UHnxt60N2hIjy**uu`T#sBS}ocmpJ4`b}(0291`WAwhk_Y+s4k85!qDlB0I1FT_) z4LpbucCm*`*vABiIL6Eq({FDUeJr5Djab1d)-l8;MtB5I;3*v75WU~v+krk7P~m2* z;%;nU3p?1uKAy)hW?Iv)H-`mOSiu^4qwjj}efc}0`l#~W_pKXV^M7Ib{S5EJk4;s< z)EI;Nr`g*&HNxQI)2y~l^&TKUh@Zf!it`^od-l3dlDALwKBd&Fzxbi||Bs(yzGJHX zeAL0Gr&)jAW`E~2N9cWKnnR2+Tbs@cSjHMg*u{QL`QLo`=-@or`|R{Siv?6z#u|p$ z#0a|>V;>V7p|^{#4}D>QtgtK$$TbXwO>%@?VNC90A{>#m-skxC3VpJ+Kvu#sSsReG zHF79yk|SZ4tc}UqK3SWPwIj0D+s*e+=##YtvR09`WwJIPYind}NY*yV+K8;}lC?2e z+b3%ivUWt4dY_+u|Fv1N)+b8~%%zICwoH}=%(XSLG-R%AlBE%IX_vV+CQJLwwFy}| zVlMR_;>S^#B};wg(gJg-VlFK+mj=wGHRjTgxwOe#8Zno4nM-5l(mr!(!rbJDz0~^x zKMw}8%%whaX@R*(#a>!w{(Xj-ttXFvn0a8Z##|aQmo}N3jMz)N%uUAZP4?MK6XqsI z>`i)ge*T16vPqx4$pU+mioMA)dy@fslQs4xL-r<{>}^K$q+RAFWA-Nd>}@9WOpe%_ z^!WTeCbR5q`t(c|*qc=BZIo&sc|(+-=6?Znv^jr<-+pN(`Ysj6=COw-Gy|i|@vl-KKwNKyG zgubgI`mTC>-WgZ3^lkd|TrJRdRnfOurkB=$J6CJ;T@C5G+N7V>h&!8IdNyNvY3+08 zYC_-D5q+E9!~FeuO0)EB`t)2a(05hQPivVwR|ER4*661-DwI8OKZZN%^^KkNAz7C(@(3%=h3j4q33FrzNgq3-$I+04^(wLm|uMebZx z^lg^txmu?0YK6Y50ex4i^#2yae|O(+@BS3MT1xBmZHDw*ZP0hMN#ACRo~sdkn;m*- z?Q&N=($>?pH{`4%@RFV%kBiAt#jvUNZ)3Io~upzv)ba`)rh{$ z4!yK?xpTEg-_@ADt4s9L+UL&ZfS#)f{j?6bb9F@D)iHfnJw884S~J|anx*e*j=rlt zeOL4JT`kaewMaj$iaS?J^lg^uxmuy0)_^-#tMt=air8lcJ?p>|ZpVg52 zv^KbNwMlTt3LfjndiNEE$~iSi`*?r#d~Qj zaW}7J-dU6t-g7meKd)8ZS(G*2OKY9GMH%wmqHOS9TASQ0$`pVu<)%xi^rTn*^YYL)wWt?`bl zb^5a!a_?$`{;W2+pVb!ku154{wZr|acDc9Nqc^KD_p`dhz0E$oSsie1God%DL+)pF z#J$Ziy;=46{L1s1;T@Y*`;T*N6*!mzR4x_uJ-A>I-qYd zVejgYzN;hp(lK*aJwCs^$qaj&S$eMK=u3U(uIA~xTA(j2GIv$cw^^cRvdrFQg`UlT zp2;eEn>BjUI&+&LJ(~@B(k648EqXR1dL}#UZFcFI?6Ef)v$wfKPugd0b3o5#LQgtm zZgWJ>8pwn5f5$t__-mUftTg*~!1CTo|- z+CF(8Ovu_Hd1P?RT^elRJ0>yLb$H z7zHZU(?87mlI6>C_> zy%^#FY~mq2j7OIJ|GxX}Bfn4X;YmD&159v;qh()TeRBIdRcmP}2!7lc23HzAf5J%`8oPPT=xC(tNU=d4L!EIQ_5Stib7h~*Wf+O_4 zG5z+e!aNqSgj=wN4LpQhJc(y;gqbI%U(f1gKl}X6|H}u-rDcETPmf>v7FX+4awSwtc}Uqgsk-* zXEI=KGo)uTqGvOvXEUMas<)jVH;X=d zR~3C%1Nt^YdTEWgb2X;#YC_*t?^FD^r_`tKs-mCPfIC-1`e}{0b2X;#YC=D)-VT0# zQtH!BtKu%L0e5K)xpOt5pVpYWv?ko8)%!F*Zz=WZr&Vz`s{!|}hV*AO;y$f0cWF(y zORKk&pU0H?^wX-iOKZU0tcKjDHR5hoWA4+Ma5t;oXZU%YQJ=fCD(+@A;6AM(ce5IC zKdUkKvzl<9RORLY_tQNSRRmJ_Rmbp)Bz}>vocxP5a?q{{h{j5gZ&uW+Z zS&g}$)js#Lns7g>Bkt$b`z*izXEe**toq!~Yk_xWRdGMBW!{bDHITR(Gs<>a2 zW!{_Bfctr^@y@)4yfd#&-kH~kcjmRrJF^;dzbN~>H?Ilr%<72ydG$WW$C)|Ja=$2j z-ka9~@64;>okdyZy?G6IXHnL8Z&pL@=e5Z@X{?@k_kG{uE?SUX-kH~!cV@NE{k$f; zvnWTrH>=)mKK{&Tmb*pk^X{w`xSvW*NAuKwaYt;Hs;-V?eosOCcLvKN4&Qv zy*eMK7G#!p=GEt&AC?8)pI5~@i?Ymni!$K7AC@)VpVyFg7G;z7epp7lzbL!BwTE)AIw#>VWHsIYwS>wG$8S>tu zZSwA-jd*v_c6oPE#=Q5#vd{aAGU2^NIpVz^mfjvdzAo4-@6D^vJBzl!yNg!w?xHO7 z-l7b6Z_(CxchQEtyC|Eyw`e2YU9?@^U6e8JE!sZsF3N=Weprrpe^Gj0z<-Y@a-a6x8yXI_2YS(F9dn^(m<^IGPeMH}$$qO9@WyoS6p zuT9=rlo9XEYnONCHRhdJ?Q_3q6W*QG5%=@zJ9 zZ&nrevs&hURs-%AWsUb{HROI)o7^wTi1+5T%R93gb3dzn?iXdkdubhUH?LmE$B~p~ z=+A1F`+3dr&aC>}&uX6gv=+FV*COwvRdF|~CGHnxnfKCK;civ~?$cW3ZeDA=lh!(S zX$`rX)du&o+T?y#TimBL;x4Tn?$X-jZdQBT&uYy5tS)iyYM*{u2i&DK;V!L1?pz(w zcXdpER=r>5&*PM4=%+QyovS(ev+8s2YMy>t3*4o($epW-zN;nru9oSiwZfgvfS#*W z`mWaKyIQC3YDhn=4enfR(s#8*-)2P5)ee1EyYyY{(YG1Xb9ITn%|1O>2lQhHc>X^Ps?-4%!JDQZ!%$Ta>(A~ zh`n^o+@$wq<-PaCV1~KLEPH8=xk;bB%{)D6fw{>ddy|U2w8Y$GnZ2~a++@IBT4ips z#$H-yZZc#qZ7?_4WG`(omqyG@cGzpXWRpGi-|zWXuKy=G$LzIBWRrdN(gAa6!dyCJ zE*&wKj+slnef)kCX2{YkbCWstTAwVaYl~!)ioLYNTw5kfE6k+^YL26JtbtZk8{5p!*atnHGu zJ+d|?YnRB{K3O{;YZJ0|NFE8tWUaTKUniImX35$dS?iOvd9t=Z))vWHMJ@@;tt<6);7pZVT&9IJ7jH_+!MxR?Gm{!9FVmMc_yz`s0=XzuBgwEDXrnD!GPr46z|>l3N&IN7yCzFcvP6`#8V^hd9Eq z(EC+>{4k3-^f8YGETR&Y$=V7zz$(_Tjv+R%DQuA=>|hsr7~>N5aexU9afD;^9_7aa zvzS94^H{(lDlB0cD;QuEYgoq+o7ln#JJ`h@E@2-BnBZ{P7nlC+(|<+wevR)ZW-*68 z=CObZOIXGVR|hsTT*3h+IKnYz4otrvS@bcFMO0YE3I)5~uySRi2jxp2Z=NElki$&akWejk~vfupRPyBYB z+`tw_*u@z8IKT_&eQo;f$>3^SiwetF#X2_e(6TD~Z1pJlB%Z?&W*%4Ghws||z3Z2q zd?{iX1FT~cBkW;+*)z{>|JQ#(&U}6Pp1T<#XlOqeek2($PY{nADpT_F;#zl z^{_h4{wJsE&$}ILXZ|Vr817Ks|8?CLA71n6>D=2nHA3|n=C!G1Y+@h%&$7n|_2*&o zcTM*JMyNl3O5t`}!VvX;e>nRPUmxoK?r4GkK3?++O4a_W z55Dm4$%_9jRCC!s?tT1wuabTK`zAH<-(e~6@8>ts2;~Eb;dv`tOrG{yMpe`me96{Pmy5U+?I@jwQvd7yai#jgQ|W)E}pJH&uCG{SV(b(WH<1 z<7Vw^(|PY}s(MGjR$~D-;AY&0H4L$dhw&Jm{8y@3cc)|LG{XhbOU*159v;7tuR3{q|=ti#c3_YjGVa zEa7I{g4?i$bqukAO+17V9>HUH0++ClXE4Dbj&O|LxA=Zy7IWz18eEI(a6N9ojkp=N z;5Mw{ZrqC@Hn52;jIe`U>|u;2@f7xPfC&z9gk!vf-c$UzU=~;5YFvYPEC`Eag&VMh zWvt*9+=f-G;cncEAvW*;HnD|=Fv1RY@fh|n#*?^&eH`F9OmK)7aD-#Lge!iNA75OF zt1ySYFi&2K1uWuvR9M1|SjGx&!2qkm8hJO?F~oh?z^1T8K7|rciBA>!O z4)7c%cpis%;Wt(AAOGM>|M4%$W1$yKe_pgJ$XDW3xDv0%EM9}FFo)OSYP=qOTqDer z*9r?{?K-lyNWKl%4LjI2FQ?%@gH^W+#` zz>~Nne2IJtPh%g?2nS^CIr6KR;A?muU&kT7ffs}$@>_UOI3{b~Azwo8xA^^wSKta^ zhI|#S6lTfS;40y@&dsH z65c`HfOldE@4}73P2@7(jhpcvtO)NVZ^8R8z^%9qAH*td#~s3*~(E5iQekAn&4gERueqGttMP{O(JQpo1n-u%n&7Q#s|lCAwXG)6E4I~y%dT&$ zN%XX;ttQda-qBW*=xu1L375UIttQbcwbcaguC|)sZEULvm)+D>ljxP(YQkmjZmUW3 zHn-IT@1C}r;8ogc!e#Gmt4Z{>wABRfzP6g+1#LCqvRm6~61{D0HQ}<=wwgpwyS=R@ z(c95h6E0h8t4Z{nsC`YZ8eGB-nN=>*@xR|61}jkCU}pu z)dX)}TTQs^{;Cj1Q_YpcoFYqr&d%N}g2N%XX>wwgpw`ea*8 z#@?Z}nsC{t+G-NLsI4Y=?Y5fW9d4@$mwmdeCeiD()r89)X{$-}v`5=&61{F)O}Ol_ zwwgpwd%UeC(d)I{lf;T_}+cqf*Gcab+5EHl^M zP2McLhg=cfOWq>9j~ocMlD7#TBv)}e?hx)I*Mz&syM+&t>%u+cy~2r5^bp~%`UNm?c^Yy~p$;#jz%r{`k;9bl&8r;OZEWCR;e{1ji|9ax& z%Vyy{4>7MB+{1jY!H1cL2KO zNAbW9@IO3t>wouE=1t+_WbHw+_6c&!;FHXs!pLBo`C)@kGw&FDhWQbLN11np&ytVf zaqJm^8{bR^9EmM zJ~a3S^9u&wWIi(Z7W0evHjWLx!~BxLcbR*?J^grP@(T7V3|`4RWAG~GEAeW~8oY-2 zDudTD&l$Xq`D%;Tv-fe0#T(h@4c^3jt-+g_7lgNvwd=^zTbUQ}He7G;cIL|99n3cv zypws!;$7@F;wCH`yqo!EgZD767`&JH7K8UO4=iqFzYQP6s=@8dcUauXzGiS2^W6p? zVqV8RxYyvr%tMR&*qhwXzG3iD<_8Qu#=MD-<3Wp0ux}ZBlKCNvPqB{-wwWI`_%!nl zK7&UL9%ZiWlBLfwKW6bb`<}t)n4iGsF*f)D^OF`|WWQwaCFZ9ro@U=S_%ib|coqj1 zUtxdF;H%6NgRe0^Z}D~ZLwo}-SbUTH$lzPdFB*KCxpqvJzQbI5i7b7Wx%c$+&-E*C zg~cn`XAEA&T)UDiy_$K};x+77;kB4EcpY=?YO?ft=Dx)n*sn2oBlEn)o7k_#o3UW< z7Ut_L-pamc@izAB4c^XN;T^ca;+^bE7Vl!e(c&idWs7&S-)!(6<`ujbw^+Q7ePD4b z`)vjvWUj4}O>SquQ&=NQcQM~>@geqgi+kAbHTW>|(BdQP_u+nQSbUWI0gI2ZZ(4ku z{XvUQuy5g$c*x>Y>?4D1=7%jl&Awyt8TLmk9%bLfXYrWDInUv+M^JUtxa^U&X}WYs|Ih$tGWCKeYG;`wJG|WIw{U z@S?@H*^e#0!~T-RciDU2;h!@Yyn^`(i&wJG;7W^Ev(Gwu4ZT$suVtUJcpdxI7O($~ zQcLaM|B+?A$(I{&jl~<;=PlmEUb~iT@@Do0gSRkWhqo^Kk8XbLh7Xa825)1&-s0`- zmBl;QZ?JeLdu@qq@-FrpaTAse-pzco#e3LSEZ)n0i^YKbR@`RsLH1RP+u851xRZU& z;x6{PEk4A)j(c#g#fRC479U~1&*Fae+6GzrDDwmO7&a|F&irF8kic z?wUuBw}t-(Y{i;G4{~BeKc2*k3gG z_VS;ze}2VZ{>Wdk9~*p!`6YZ8z0UOK*W?xKS6IA~ea7Nd>{nX6ntc|p!BrNoWuLQn z9sAV=uV?OCyn+21i#M{*<4w5M;LXgn1+w%O=Gt{+>8;GQMY8lZ=IilxR2J`Gzro_2 z?6oDb^e*Pwjb!O2=Grn@dN=dUcn?-A-pgLQg>3RZ_JPH%?6+BbkbTwQcIMh0_s+QVd%PqXhBe1`cEJSyyxrOz@yX7M=tp2g?bpRo8m``F?O>`&s0xMcAq z_NOeKX5Y8?GW#~jXMW3F9Iz8-yp zH!#<(Axm#$uFaFBH!;_)B};E+Ucg&$oyA+(7cJh#e!a!p*=rTq}wWxvEPjkVcp^$ z_IoWp%s#aE2>X2&_p@){qjynP>4DTxIcE_Bo5!v0rWRdiK7>8`!VG8!>P2Cgy7` z-psyW@D}FlEZ)k#Xz(`X>+yC}2Jc|5-9VP!$-HFoF7_J@Zem`>yK%F{d)QYD-phQ8 z#rxO?2DdWbX7NGxRosp{4DMvEt&yd>nD4gu5PNN%EZxI=FFuT+#Yfohv$&sq!{Vdt z4_JJRebeCM%n#xd*s}N}`$GnwVjfv+vp;O{Y4#mFV(}>ZuEA%SAG3Izeb3@^>`z#H zo_&lbExyQp$>K}wPgy+8zHjhl=4bFM4h+7+{G7p8nI{HcV}9P?>&&%7vh)q+7w}CS zS$vEAMT2iM9~*p!`6Y|*viE*x`qvRx;0l9RGS3*iin(?rS$Z{dZI&#(hWRRs*Rs#y zYKzyi_YK~_e2v8$+2;-3#C$E@j0J^ia3}K`?!w&`A7WoOxQF>( zgAX$gEk442pTYgi9~B-TOCMw2H2664g9e{q-ZJG=}-m++Lq)6Dw@UuJ&B;92Ga zgRd|@XE0&@8lE@!I`g5yH<({A_$Kp_!MB)S#J6!Qd}lcy+Wwb+HF5IglEHVGdq<`{ z?G@w|2CrnEF?bd8m6)}74f|CFuVtRIxSGB6dgeaffNKoi$UJZGCgy7m-pssU@D}Fl z4BpE8He7G;cIL|99n3cvypws!;9bl&;wCH$?4Ijj+ za65U2!JW)&26r*vZLrRK5AHSiF!RvhKIYQ>%o_$DWqv^T7`cg$<3ZsQw7nJNe_qyC8g%JTmwe^NR-GWzJ<=UQhN7-oSi~ z!5f+94c^53W-JJAA+IxdEAyi8Hu8Fdw==&3HyFH=c}aK|d85Hi%*(>N$(x1ukSlmE zZV}!`4h(K%E`5-BRk)qJ19xIgxQo17_z<~na1Zmn!iUMB!AF?y$A-a2nI8~7Ms5lp zCm%HU1oJ2HknkyTWU$Tru<&Ve$KW%}j|h*FpT%Rsg>R5A2;U@+@GZP3e49Krc!|06UFP1= z>7NJME66JhW|*%ucs28^@EYjv+y2rMR+fHi|{^jfZGgKnM-#t*X|_OguBSQ zg%6SU;9lXwx;fKB1!yoYi}X16BfzZ+sL=05^f-C?Dg`3D_;fKkag+EWO2=66t5&i=CV;JE5xJ~!~`4dCfr5dE&M#WF8m^SukcIc5P!L=;uQkS@JRAadHoz!xO^i$+7SS z@=1IVm+;@>DdA~yAO8f;EdP9b`S8*HvhV$T41}+c&*Aql5uPV&|C~I;zrYLl?{S20 z;YHzJk;nMgcnSYs^nQQ(?bog#Yp*0{@O`)ve-yL0N_Z_fhyMkx#vel;Z@@KpBj$0f z@Sld<6I5SFnKx@T=IwgZKn~6Axj8hksxF|L_d1ZU6uP literal 0 HcmV?d00001 diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_test.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_test.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_test.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_train.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_train.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_train.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_val.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_bioclim_val.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_bioclim_val.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_test.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_test.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_test.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_train.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_train.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_train.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_val.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_landsat_val.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_landsat_val.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_test.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_test.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_test.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_train.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_train.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_train.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_val.csv similarity index 100% rename from examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/stats/Stats_satellite_val.csv rename to examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/dataset/geolifeclef-2025/Stats/Stats_satellite_val.csv diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py index 2b42762b..5375b6e1 100644 --- a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/glc25_cnn_multimodal_ensemble.py @@ -14,6 +14,9 @@ import torch from omegaconf import DictConfig from pytorch_lightning.callbacks import ModelCheckpoint +from torchvision import transforms + +from transforms import (MinMaxNormalize, GLC25CustomNormalize, QuantileNormalizeFromPreComputedDatasetPercentiles) from malpolon.data.datasets.geolifeclef2025_pre_extracted import \ GLC25Datamodule @@ -43,6 +46,54 @@ def set_seed(seed): torch.backends.cudnn.benchmark = False +class GLC25CustomTransformsDatamodule(GLC25Datamodule): + """Custom datamodule for GLC25 with the desired transforms.""" + + @property + def train_transform(self): + landsat_transforms = [lambda x: GLC25CustomNormalize()(x, subset='train', modality='landsat'), + MinMaxNormalize(), + transforms.Normalize(mean=(0.5,) * 6, std=(0.5,) * 6),] + bioclim_transforms = [lambda x: GLC25CustomNormalize()(x, subset='train', modality='bioclim'), + MinMaxNormalize(), + transforms.Normalize(mean=(0.5,) * 4, std=(0.5,) * 4),] + sentinel_transforms = [QuantileNormalizeFromPreComputedDatasetPercentiles(), + MinMaxNormalize(), + torch.Tensor, + transforms.Normalize(mean=(0.5,) * 4, std=(0.5,) * 4),] + all_transforms = [torch.Tensor,] + + return {'landsat': transforms.Compose(landsat_transforms + all_transforms), + 'bioclim': transforms.Compose(bioclim_transforms + all_transforms), + 'sentinel': transforms.Compose(sentinel_transforms + all_transforms)} + + @property + def val_transform(self): + landsat_transforms = [lambda x: GLC25CustomNormalize()(x, subset='val', modality='landsat'), + MinMaxNormalize(), + transforms.Normalize(mean=(0.5,) * 6, std=(0.5,) * 6),] + bioclim_transforms = [lambda x: GLC25CustomNormalize()(x, subset='val', modality='bioclim'), + MinMaxNormalize(), + transforms.Normalize(mean=(0.5,) * 4, std=(0.5,) * 4),] + sentinel_transforms = [QuantileNormalizeFromPreComputedDatasetPercentiles(), + MinMaxNormalize(), + torch.Tensor, + transforms.Normalize(mean=(0.5,) * 4, std=(0.5,) * 4),] + all_transforms = [torch.Tensor,] + + return {'landsat': transforms.Compose(landsat_transforms + all_transforms), + 'bioclim': transforms.Compose(bioclim_transforms + all_transforms), + 'sentinel': transforms.Compose(sentinel_transforms + all_transforms)} + + @property + def test_transform(self): + all_transforms = [torch.Tensor,] + + return {'landsat': transforms.Compose(all_transforms), + 'bioclim': transforms.Compose(all_transforms), + 'sentinel': transforms.Compose(all_transforms)} + + @hydra.main(version_base="1.3", config_path="config/", config_name="glc25_cnn_multimodal_ensemble") def main(cfg: DictConfig) -> None: """Run main script used for either training or inference. @@ -65,10 +116,10 @@ def main(cfg: DictConfig) -> None: logger.addHandler(logging.FileHandler(f"{log_dir}/core.log")) # Datamodule & Model - datamodule = GLC25Datamodule(**cfg.data, **cfg.task) + datamodule = GLC25CustomTransformsDatamodule(**cfg.data, **cfg.task) classif_system = ClassificationSystemGLC24(cfg.model, **cfg.optim, checkpoint_path=cfg.run.checkpoint_path, - weights_dir=log_dir) # multilabel + weights_dir=log_dir + '/../') # multilabel # Lightning Trainer callbacks = [ diff --git a/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/transforms.py b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/transforms.py new file mode 100644 index 00000000..b544e158 --- /dev/null +++ b/examples/benchmarks/geolifeclef/geolifeclef2025_pre_extracted/transforms.py @@ -0,0 +1,360 @@ +"""Collection of custom PyTorch friendly transform classes. + +These transform classes can be called during training loops to perform +data augmentation. + +Author: Theo Larcher + Lukas Picek +""" + +import os +from collections import defaultdict +from pathlib import Path +from typing import Union + +import numpy as np +import rasterio +import torch +from matplotlib import pyplot as plt # pylint: disable=W0611 # noqa: F401 +from PIL import Image # pylint: disable=W0611 # noqa: F401 +from torchvision import transforms +from tqdm import tqdm + + +class SafeRescaleTo255: + """Rescale an image band to [0, 255] and clip values.""" + def __call__( + self, + band: np.ndarray, + ): + """Call method. + + Args: + band (np.ndarray): 2D array to normalize + + Returns: + (np.ndarray): normalized array + """ + return np.clip(band * 255, 0, 255).astype(np.uint8) + + +class MinMaxNormalize: + """Normalize an image band to [0, 1].""" + def __call__( + self, + band: np.ndarray, + ): + """Call method. + + Args: + band (np.ndarray): 2D array to normalize + + Returns: + (np.ndarray): normalized array + """ + normalized = (band - band.min()) / (band.max() - band.min()) + return normalized + + +class Standardize: + """Standardize an image band.""" + def __call__( + self, + band: np.ndarray, + ): + """Call method. + + Args: + band (np.ndarray): 2D array to normalize + + Returns: + (np.ndarray): normalized array + """ + standardized = (band - band.mean()) / band.std() + return np.clip(standardized, 0, 1) + + +class LogNormalize: + """Log normalize an image band.""" + def __call__( + self, + band: np.ndarray, + ): + """Call method. + + Args: + band (np.ndarray): 2D array to normalize + + Returns: + (np.ndarray): normalized array + """ + normalized = np.log1p(band - band.min()) / np.log1p(band.max() - band.min()) + return normalized + + +class QuantileLinearNormalize: + """Normalize an image band based on quantiles.""" + def __call__( + self, + band: np.ndarray, + low: int = 2, + high: int = 98, + ): + """Call method. + + Args: + band (np.ndarray): 2D array to normalize. + low (int, optional): low quantile threshold to cut. Defaults to 2. + high (int, optional): high quantile threshold to cut. Defaults to 98. + + Returns: + (np.ndarray): normalized array + """ + sorted_band = np.sort(band.flatten()) + quantiles = np.percentile(sorted_band, np.linspace(low, high, len(sorted_band))) + normalized_band = np.interp(band.flatten(), sorted_band, quantiles).reshape(band.shape) + min_val = np.min(normalized_band) + max_val = np.max(normalized_band) + return (normalized_band - min_val) / (max_val - min_val) + + +def pre_compute_quantile_linear_on_dataset( + root_path: str = "dataset/geolifeclef-2025/SatelitePatches/", + low: int = 2, + high: int = 98, + subset: str = "train", + data_type: str = "tiff", + output_path: str = "dataset/geolifeclef-2025/Stats/", + max_iter: int = 100, +) -> None: + """Pre-compute the quantiles over the satellite dataset to perform global normalization. + + This method uses numpy's percentile function with mode "linear" to compute + the quantiles. It is memory hungry as it requires all the arrays stacked up to compute + the percentiles. However it is more intuitive. + Saves quantiles and min/max values as numpy objects to be used for normalization. + Satellite patches shape is (64, 64) + Quantiles shape is (4, 4096) and min/max shape is (4, 2) where the 1st element of + axis 1 is the min and the 2nd element is the max. + + Args: + root_path (str, optional): path to the satellite root folder. Defaults to "data/geolifeclef2025_pre_extracted/SatelitePatches/". + low (int, optional): low percentile to compute quantiles from. Defaults to 2. + high (int, optional): hight percentile to compute quantiles from. Defaults to 98. + subset (str, optional): dataset subset. Takes values in ['train', 'test']. Defaults to "train". + data_type (str, optional): data format. Defaults to "tiff". + output_path (str, optional): output path of saved quantiles and min/max. Defaults to "data/geolifeclef2025_pre_extracted/SatelitePatches/". + max_iter (int, optional): max files to considered to compute the quantiles from. Defaults to 100. + """ + fps = (Path(root_path) / Path(f'PA-{subset}')).rglob(f"*.{data_type}") + data = [] + quantiles = {'r': None, 'g': None, 'b': None, 'nir': None} + min_max_val = {'r': [0, 1], 'g': [0, 1], 'b': [0, 1], 'nir': [0, 1]} + k = 0 + pbar = tqdm(total=max_iter) + while k < max_iter: + try: + fp = next(fps) + if os.path.isfile(fp) and fp.suffix == '.tiff': + img = rasterio.open(fp).read(out_dtype=np.float32) + data.append(img) + k += 1 + pbar.update(1) + except StopIteration: + print(f'Max files ({k}) reached before max iterations ({max_iter}). Quantiles will be computed on {k} files.') + pbar.update(max_iter) + k = np.inf + data = np.stack(data, axis=0) + for i, k_band in enumerate(quantiles.keys()): + band = data[:, i] + sorted_band = np.sort(band.flatten()) + quantiles[k_band] = np.percentile(sorted_band, np.linspace(low, high, len(sorted_band) // max_iter)) + min_max_val[k_band] = [band.min(), band.max()] + np.save(Path(output_path) / Path(f'Satellite_quantiles_linear_approx-{max_iter}.npy'), np.array(list(quantiles.values()))) + np.save(Path(output_path) / Path(f'Satellite_min-max_values_linear_approx-{max_iter}.npy'), np.array(list(min_max_val.values()))) + + +# WARNING: Using inverted_cdf needs Numpy >= 2.0.0. Check that all other dependencies are compatible. +def pre_compute_quantile_inverted_cdf_on_dataset( + root_path: str = "dataset/geolifeclef-2025/SatelitePatches/", + low: int = 2, + high: int = 98, + subset: str = "train", + data_type: str = "tiff", + output_path: str = "dataset/geolifeclef-2025/Stats/", + max_iter: int = 1000, +) -> None: + """Pre-compute the quantiles over the satellite dataset to perform global normalization. + + This method uses numpy's percentile function with mode "inverted_cdf" to compute + the quantiles. It is memory efficient as it only requires the unique + values in the dataset's images, and an associated weight array representing their distribution. + Saves quantiles and min/max values as numpy objects to be used for normalization. + Satellite patches shape is (64, 64) + Quantiles shape is (4, 4096) and min/max shape is (4, 2) where the 1st element of + axis 1 is the min and the 2nd element is the max. + + Args: + root_path (str, optional): path to the satellite root folder. Defaults to "data/geolifeclef2025_pre_extracted/SatelitePatches/". + low (int, optional): low percentile to compute quantiles from. Defaults to 2. + high (int, optional): hight percentile to compute quantiles from. Defaults to 98. + subset (str, optional): dataset subset. Takes values in ['train', 'test']. Defaults to "train". + data_type (str, optional): data format. Defaults to "tiff". + output_path (str, optional): output path of saved quantiles and min/max. Defaults to "data/geolifeclef2025_pre_extracted/SatelitePatches/". + max_iter (int, optional): max files to considered to compute the quantiles from. Defaults to 1000. + """ + fps = (Path(root_path) / Path(f'PA-{subset}')).rglob(f"*.{data_type}") + value_counts = {'r': defaultdict(int), 'g': defaultdict(int), 'b': defaultdict(int), 'nir': defaultdict(int)} + quantiles = {'r': None, 'g': None, 'b': None, 'nir': None} + min_max_val = {'r': [0, 1], 'g': [0, 1], 'b': [0, 1], 'nir': [0, 1]} + k = 0 + pbar = tqdm(total=max_iter) + while k < max_iter: + try: + fp = next(fps) + img = rasterio.open(fp).read(out_dtype=np.float32) + for band, k_band in zip(img, quantiles.keys()): + unique_vals, counts = np.unique(band.flatten(), return_counts=True) + for val, count in zip(unique_vals, counts): + value_counts[k_band][val] += count + k += 1 + pbar.update(1) + except StopIteration: + pbar.update(max_iter) + k = np.inf + for k_band in quantiles: + value_counts_sorted = dict(sorted(value_counts[k_band].items())) # apparently useless + quantiles[k_band] = np.percentile(list(value_counts_sorted.keys()), + np.linspace(low, high, img.shape[1] * img.shape[2]), + method='inverted_cdf', + weights=np.array(list(value_counts_sorted.values())) / np.array(list(value_counts_sorted.values())).sum()) + min_max_val[k_band] = [np.min(quantiles[k_band]), np.max(quantiles[k_band])] + np.save(Path(output_path) / Path('Satellite_quantiles_inverted-cdf.npy'), np.array(list(quantiles.values()))) + np.save(Path(output_path) / Path('Satellite_min-max_values_inverted-cdf.npy'), np.array(list(min_max_val.values()))) + + +class QuantileNormalizeFromPreComputedDatasetPercentiles: + """Apply quantile normalization from pre-computed quantiles and min/max.""" + def __call__( + self, + img: np.ndarray, + fp_quantiles: Union[str, Path] = "dataset/geolifeclef-2025/Stats/Satellite_quantiles_linear_approx-100.npy", + fp_min_max: Union[str, Path] = "dataset/geolifeclef-2025/Stats/Satellite_min-max_values_linear_approx-100.npy", + ): + """Call method. + + Args: + img (np.ndarray): image to normalize + fp_quantiles (Union[str, Path]): file path to pre-computed quantiles + fp_min_max (Union[str, Path]): file path to pre-computed min/max + + Returns: + (np.ndarray): quantile-normalized image + """ + quantiles = np.load(fp_quantiles) # Quantiles shape: (4, 4096), 4 bands, 4096 quantile values. Bands order: [r, g, b, nir] + min_max_val = np.load(fp_min_max) # Min/max shape: (4, 2) 4 bands, 2 values (min, max) + min_val, max_val = min_max_val[:, 0][:, np.newaxis, np.newaxis], min_max_val[:, 1][:, np.newaxis, np.newaxis] + normalized_img = img.copy() + for (k, band), quantile in zip(enumerate(img), quantiles): + flat_band = band.flatten() + sorted_band = np.sort(flat_band) + normalized_img[k] = np.interp(flat_band, sorted_band, quantile).reshape(band.shape) + return (normalized_img - min_val) / (max_val - min_val) + + +class GLC25CustomNormalize: + """Return custom GLC25 normalization based on data modality.""" + def __call__( + self, + img: Union[np.ndarray, torch.Tensor], + subset: str = "train", + modality: str = "landsat" + ) -> dict: + """Call method. + + The normalization values are pre-computed from the training dataset + (pre-extracted values) for each modality. + + Args: + img (np.ndarray): image to normalize. + modality (str): modality. Takes values in ['landsat', 'bioclim', 'sentinel']. + subset (str, optional): dataset subset. Takes values in ['train', , 'val', 'test']. Defaults to "train". + + Returns + ------- + (dict) + dictionary of transform functions for each data modality. + """ + if not isinstance(img, torch.Tensor): + img = torch.from_numpy(img) + transfo_dict = {'train': {'landsat': transforms.Normalize(mean=[30.654] * 6, std=[25.702] * 6), + 'bioclim': transforms.Normalize(mean=[3914.847] * 4, std=[3080.644] * 4), + 'sentinel': transforms.Normalize(mean=[629.624, 691.815, 460.605] + [2959.370], + std=[435.995, 371.396, 342.897] + [925.369])}, + 'val': {'landsat': transforms.Normalize(mean=[30.269] * 6, std=[25.212] * 6), + 'bioclim': transforms.Normalize(mean=[3955.529] * 4, std=[3234.002] * 4), + 'sentinel': transforms.Normalize(mean=[633.110, 692.764, 462.189] + [2950.603], + std=[465.046, 398.975, 370.759] + [927.021])}, + 'test': {'landsat': transforms.Normalize(mean=[26.188] * 6, std=[29.624] * 6), + 'bioclim': transforms.Normalize(mean=[3932.149] * 4, std=[3490.368] * 4), + 'sentinel': transforms.Normalize(mean=[517.786, 565.655, 376.777] + [2289.862], + std=[530.537, 497.530, 427.435] + [1510.104])}} + normalized = transfo_dict[subset][modality](img) + return normalized + + +# # Example usage +# if __name__ == "__main__": +# # pre_compute_quantile_inverted_cdf_on_dataset( +# # "dataset/geolifeclef-2025/SatelitePatches/", +# # subset="train", +# # data_type="tiff" +# # ) +# pre_compute_quantile_linear_on_dataset( +# "dataset/geolifeclef-2025/SatelitePatches/", +# subset="train", +# data_type="tiff" +# ) +# patch = rasterio.open("dataset/geolifeclef-2025/SatelitePatches/PA-train/00/00/3440000.tiff").read(out_dtype=np.float32) +# minmax_norm = np.array([MinMaxNormalize()(band) for band in patch]) +# standard_norm = np.array([Standardize()(band) for band in patch]) +# log_norm = np.array([LogNormalize()(band) for band in patch]) +# quantile_norm = np.array([QuantileLinearNormalize()(band) for band in patch]) +# quantile_norm_precomp = QuantileNormalizeFromPreComputedDatasetPercentiles()( +# patch, +# "dataset/geolifeclef-2025/Stats/Satellite_quantiles_linear_approx-100.npy", +# "dataset/geolifeclef-2025/Stats/Satellite_min-max_values_linear_approx-100.npy" +# ) +# custom_norm = GLC25CustomNormalize()(patch, modality='sentinel').numpy() + +# Image.fromarray(SafeRescaleTo255()(np.transpose(patch[:3], (1,2,0)))).save('original patch.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(minmax_norm[:3], (1,2,0)))).save('minmax_norm.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(standard_norm[:3], (1,2,0)))).save('standard_norm.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(log_norm[:3], (1,2,0)))).save('log_norm.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(quantile_norm[:3], (1,2,0)))).save('quantile_norm_linear.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(quantile_norm_precomp[:3], (1,2,0)))).save('quantile_norm_inverted-cdf.jpeg') +# Image.fromarray(SafeRescaleTo255()(np.transpose(custom_norm[:3], (1,2,0)))).save('custom_norm.jpeg') + +# fig, ax = plt.subplots(4, 7, figsize=(20, 12)) +# for i in range(4): +# ax[i, 0].imshow(patch[i], cmap='gray') +# ax[i, 0].set_title(f'Band {i} original') + +# ax[i, 1].imshow(minmax_norm[i], cmap='gray') +# ax[i, 1].set_title(f'Band {i} minmax') + +# ax[i, 2].imshow(standard_norm[i], cmap='gray') +# ax[i, 2].set_title(f'Band {i} standard_norm') + +# ax[i, 3].imshow(log_norm[i], cmap='gray') +# ax[i, 3].set_title(f'Band {i} log_norm') + +# ax[i, 4].imshow(quantile_norm[i], cmap='gray') +# ax[i, 4].set_title(f'Band {i} quantile linear') + +# ax[i, 5].imshow(quantile_norm_precomp[i], cmap='gray') +# ax[i, 5].set_title(f'Band {i} quantile inverted_cdf') + +# ax[i, 6].imshow(custom_norm[i], cmap='gray') +# ax[i, 6].set_title(f'Band {i} custom_norm diff') +# fig.savefig('all.png', dpi=300) diff --git a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py index 94aa16b3..94dbc9e7 100644 --- a/malpolon/data/datasets/geolifeclef2025_pre_extracted.py +++ b/malpolon/data/datasets/geolifeclef2025_pre_extracted.py @@ -47,25 +47,6 @@ def construct_patch_path(data_path, survey_id): return path -# def quantile_normalize(band): -# """Perform normalization on an array. - -# Args: -# band (_type_): _description_ - -# Returns: -# _type_: _description_ -# """ -# band = np.array(band, dtype=np.float32) -# min_val = np.nanmin(band) # Use nanmin to ignore NaNs -# max_val = np.nanmax(band) # Use nanmax to ignore NaNs - -# if max_val == min_val: -# return np.zeros_like(band) # If max and min are the same, return an array of zeros - -# return ((band - min_val) / (max_val - min_val)).astype(np.float32) - - def load_landsat(path, transform=None): """Load Landsat pre-extracted time series data.