Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
06594a0
Add base files for waterbodies extraction pipeline
LucaRom Nov 3, 2025
28990a8
Fix all pre-commit linting errors in water extraction pipeline
LucaRom Dec 2, 2025
0b71302
Add comprehensive README for water extraction tools
LucaRom Dec 2, 2025
cf954be
Add valid LiDAR mask support to tiling workflow
LucaRom Dec 12, 2025
713040d
Merge remote-tracking branch 'origin/main'
LucaRom Dec 12, 2025
4dc1003
Merge pull request #611 from NRCan/main
LucaRom Dec 12, 2025
dfb335d
Enhanced water extraction pipeline with production-ready preprocessin…
LucaRom Dec 26, 2025
aa42622
Add seam correction tool for lidar mosaic boundary artifacts
LucaRom Feb 24, 2026
31f047b
Housekeeping: Apply code formatting and cleanup for water extraction
LucaRom Feb 19, 2026
57de668
feat: Add optional intensity channel support for water extraction
LucaRom Feb 19, 2026
5da18fa
fix: Handle 1, 2, and 3 channel images in visualization
LucaRom Feb 19, 2026
efcdf43
Add lidar mosaic seam correction tool with Gaussian inpainting for in…
LucaRom Feb 24, 2026
9af83d6
Add seam_correction logic into the pipeline to correct DTM and DSM
LucaRom Mar 5, 2026
e898b72
Add slurm folder for jobs and logs. Add an example script for downloa…
LucaRom Mar 5, 2026
acd6560
Restore Gaussian seam correction (revert bilateral filter)
LucaRom Mar 5, 2026
6ff0885
Revert seam correction to Gaussian, update callers
LucaRom Mar 5, 2026
3d532e0
Add seam correction parameter config
LucaRom Mar 6, 2026
30f6c0e
Add command line overriding abilities to prepare data
LucaRom Mar 6, 2026
15200d7
Add sh file for prepare data on hpc and housekeeping
LucaRom Mar 6, 2026
3700954
Fix overriding extent path when config is none and add logs to debug
LucaRom Mar 6, 2026
7dcdc62
Fix change project_extents_path to null instead of None
LucaRom Mar 6, 2026
1583d68
Merge stoud and sterr for prepare data job script
LucaRom Mar 6, 2026
840e08a
Revert to non override approach for prepare data script
LucaRom Mar 6, 2026
67ce5a2
Add more ram to prepare data job
LucaRom Mar 6, 2026
75b6531
Add script to create a buffered version of the AOI as a preprocess to…
LucaRom Mar 9, 2026
3f83e55
Fix copying features format for dictionnary, not object
LucaRom Mar 9, 2026
d242b33
Fix AOI type not matching after buff
LucaRom Mar 9, 2026
1f0f955
Add script to create the valid lidar mask
LucaRom Mar 13, 2026
19b0797
fix: split datamodule prepare_data flow for training vs inference
LucaRom Mar 13, 2026
cb27cce
Fix valid lidar mask handling during inference
LucaRom Mar 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*__pycache__**
*.idea**
*.vscode**
.ruff_cache**

# Distribution / packaging
*.egg-info/
Expand All @@ -9,6 +10,13 @@
waterloo_subset_512/
mlruns/
.ipynb_checkpoints/
temp_local/
data/
logs/
jobs_sh/
results/


# Specific files
environment_full_conda_bckp.yml
core*
104 changes: 104 additions & 0 deletions configs/01BG001_gaspe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
seed_everything: 42

trainer:
accelerator: "gpu"
devices: 1
strategy: auto
precision: "16-mixed"
sync_batchnorm: false
logger:
class_path: lightning.pytorch.loggers.mlflow.MLFlowLogger
init_args:
save_dir: /gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/logs
log_model: true
experiment_name: "01BG001_gaspe"
run_name: "unetplus_full_dataset"
callbacks:
- class_path: lightning.pytorch.callbacks.ModelCheckpoint
init_args:
monitor: "val_loss"
mode: "min"
save_top_k: 3
save_last: true
filename: "model-{epoch:02d}-{val_loss:.3f}"
- class_path: lightning.pytorch.callbacks.EarlyStopping
init_args:
monitor: "val_loss"
mode: "min"
verbose: False
patience: 2
- class_path: tools.callbacks.segmentation_visualization.VisualizationCallback
init_args:
max_samples: 3
#mean: ${data.init_args.mean} #TODO verify if used in callback with older versions
#std: ${data.init_args.std} #TODO verify if used in callback with older versions
#data_type_max: ${data.init_args.data_type_max} #TODO verify if used in callback with older versions
#num_classes: ${model.init_args.num_classes} #TODO verify if used in callback with older versions
#class_colors: ${model.init_args.class_colors} #TODO verify if used in callback with older versions
max_epochs: 100
log_every_n_steps: 50
val_check_interval: 1.0
check_val_every_n_epoch: 1

model:
class_path: tools.water_extraction.segmentation_task.WaterExtractionSegmentation
init_args:
encoder: "resnext101_32x8d"
image_size: [512, 512]
#weights: imagenet
in_channels: 3 # TWI, nDSM, intensity
max_samples: 8
num_classes: 2 # Background (0) and Water (1)
ignore_index: -1 # Ignore pixels outside AOI boundaries (MIoU)
loss:
class_path: torch.nn.CrossEntropyLoss
init_args:
ignore_index: -1 # Ignore pixels outside AOI boundaries
weight: null # Can be set to handle class imbalance if needed
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 1e-4
weight_decay: 1e-5
betas: [0.9, 0.999]
scheduler:
class_path: torch.optim.lr_scheduler.ReduceLROnPlateau
init_args:
mode: "min"
factor: 0.5
patience: 8
cooldown: 2
min_lr: 1e-7
scheduler_config:
interval: "epoch"
frequency: 1
monitor: "val_loss"
class_labels: ["background", "water"]
class_colors: ["#2E2E2E", "#0077BE"] # Dark gray for background, blue for water
weights_from_checkpoint_path: null

data:
class_path: tools.water_extraction.elevation_stack_datamodule.ElevationStackDataModule
init_args:
# Required by parent CSVDataModule
csv_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG001_gaspe"
patches_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG001_gaspe"
# ElevationStackDataModule specific parameters
input_folders:
- "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/gaspesie_01BG001"
output_root: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG001_gaspe"
csv_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG001_gaspe/01BG001_gaspe_water_extraction.csv"
csv_infer_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG001_gaspe/01BG001_gaspe_inference.csv"
include_intensity: true
stride: 512
patch_size: [512, 512]
batch_size: 8 # Increased for full training
num_workers: 16 # Utilize more CPU cores
regenerate_csv: false
min_water_pixels: 1

# Training configuration
ckpt_path: null

# Optional: Resume from checkpoint
# ckpt_path: "/path/to/checkpoint.ckpt"
104 changes: 104 additions & 0 deletions configs/01BG002_gaspe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
seed_everything: 42

trainer:
accelerator: "gpu"
devices: 1
strategy: auto
precision: "16-mixed"
sync_batchnorm: false
logger:
class_path: lightning.pytorch.loggers.mlflow.MLFlowLogger
init_args:
save_dir: /gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/logs
log_model: true
experiment_name: "01BG002_gaspe"
run_name: "unetplus_full_dataset"
callbacks:
- class_path: lightning.pytorch.callbacks.ModelCheckpoint
init_args:
monitor: "val_loss"
mode: "min"
save_top_k: 3
save_last: true
filename: "model-{epoch:02d}-{val_loss:.3f}"
- class_path: lightning.pytorch.callbacks.EarlyStopping
init_args:
monitor: "val_loss"
mode: "min"
verbose: False
patience: 2
- class_path: tools.callbacks.segmentation_visualization.VisualizationCallback
init_args:
max_samples: 3
#mean: ${data.init_args.mean} #TODO verify if used in callback with older versions
#std: ${data.init_args.std} #TODO verify if used in callback with older versions
#data_type_max: ${data.init_args.data_type_max} #TODO verify if used in callback with older versions
#num_classes: ${model.init_args.num_classes} #TODO verify if used in callback with older versions
#class_colors: ${model.init_args.class_colors} #TODO verify if used in callback with older versions
max_epochs: 100
log_every_n_steps: 50
val_check_interval: 1.0
check_val_every_n_epoch: 1

model:
class_path: tools.water_extraction.segmentation_task.WaterExtractionSegmentation
init_args:
encoder: "resnext101_32x8d"
image_size: [512, 512]
#weights: imagenet
in_channels: 3 # TWI, nDSM, intensity
max_samples: 8
num_classes: 2 # Background (0) and Water (1)
ignore_index: -1 # Ignore pixels outside AOI boundaries (MIoU)
loss:
class_path: torch.nn.CrossEntropyLoss
init_args:
ignore_index: -1 # Ignore pixels outside AOI boundaries
weight: null # Can be set to handle class imbalance if needed
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 1e-4
weight_decay: 1e-5
betas: [0.9, 0.999]
scheduler:
class_path: torch.optim.lr_scheduler.ReduceLROnPlateau
init_args:
mode: "min"
factor: 0.5
patience: 8
cooldown: 2
min_lr: 1e-7
scheduler_config:
interval: "epoch"
frequency: 1
monitor: "val_loss"
class_labels: ["background", "water"]
class_colors: ["#2E2E2E", "#0077BE"] # Dark gray for background, blue for water
weights_from_checkpoint_path: null

data:
class_path: tools.water_extraction.elevation_stack_datamodule.ElevationStackDataModule
init_args:
# Required by parent CSVDataModule
csv_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG002_gaspe"
patches_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG002_gaspe"
# ElevationStackDataModule specific parameters
input_folders:
- "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/gaspesie_01BG002"
output_root: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG002_gaspe"
csv_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG002_gaspe/01BG002_gaspe_water_extraction.csv"
csv_infer_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG002_gaspe/01BG002_gaspe_inference.csv"
include_intensity: true
stride: 512
patch_size: [512, 512]
batch_size: 8 # Increased for full training
num_workers: 16 # Utilize more CPU cores
regenerate_csv: false
min_water_pixels: 1

# Training configuration
ckpt_path: null

# Optional: Resume from checkpoint
# ckpt_path: "/path/to/checkpoint.ckpt"
104 changes: 104 additions & 0 deletions configs/01BG003_gaspe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
seed_everything: 42

trainer:
accelerator: "gpu"
devices: 1
strategy: auto
precision: "16-mixed"
sync_batchnorm: false
logger:
class_path: lightning.pytorch.loggers.mlflow.MLFlowLogger
init_args:
save_dir: /gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/logs
log_model: true
experiment_name: "01BG003_gaspe"
run_name: "unetplus_full_dataset"
callbacks:
- class_path: lightning.pytorch.callbacks.ModelCheckpoint
init_args:
monitor: "val_loss"
mode: "min"
save_top_k: 3
save_last: true
filename: "model-{epoch:02d}-{val_loss:.3f}"
- class_path: lightning.pytorch.callbacks.EarlyStopping
init_args:
monitor: "val_loss"
mode: "min"
verbose: False
patience: 2
- class_path: tools.callbacks.segmentation_visualization.VisualizationCallback
init_args:
max_samples: 3
#mean: ${data.init_args.mean} #TODO verify if used in callback with older versions
#std: ${data.init_args.std} #TODO verify if used in callback with older versions
#data_type_max: ${data.init_args.data_type_max} #TODO verify if used in callback with older versions
#num_classes: ${model.init_args.num_classes} #TODO verify if used in callback with older versions
#class_colors: ${model.init_args.class_colors} #TODO verify if used in callback with older versions
max_epochs: 100
log_every_n_steps: 50
val_check_interval: 1.0
check_val_every_n_epoch: 1

model:
class_path: tools.water_extraction.segmentation_task.WaterExtractionSegmentation
init_args:
encoder: "resnext101_32x8d"
image_size: [512, 512]
#weights: imagenet
in_channels: 3 # TWI, nDSM, intensity
max_samples: 8
num_classes: 2 # Background (0) and Water (1)
ignore_index: -1 # Ignore pixels outside AOI boundaries (MIoU)
loss:
class_path: torch.nn.CrossEntropyLoss
init_args:
ignore_index: -1 # Ignore pixels outside AOI boundaries
weight: null # Can be set to handle class imbalance if needed
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 1e-4
weight_decay: 1e-5
betas: [0.9, 0.999]
scheduler:
class_path: torch.optim.lr_scheduler.ReduceLROnPlateau
init_args:
mode: "min"
factor: 0.5
patience: 8
cooldown: 2
min_lr: 1e-7
scheduler_config:
interval: "epoch"
frequency: 1
monitor: "val_loss"
class_labels: ["background", "water"]
class_colors: ["#2E2E2E", "#0077BE"] # Dark gray for background, blue for water
weights_from_checkpoint_path: null

data:
class_path: tools.water_extraction.elevation_stack_datamodule.ElevationStackDataModule
init_args:
# Required by parent CSVDataModule
csv_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG003_gaspe"
patches_root_folder: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG003_gaspe"
# ElevationStackDataModule specific parameters
input_folders:
- "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/gaspesie_01BG003"
output_root: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG003_gaspe"
csv_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG003_gaspe/01BG003_gaspe_water_extraction.csv"
csv_infer_path: "/gpfs/fs5/nrcan/nrcan_geobase/work/transfer/work/deep_learning/gdl_projects/geo-deep-learning/data/preprocess_01BG003_gaspe/01BG003_gaspe_inference.csv"
include_intensity: true
stride: 512
patch_size: [512, 512]
batch_size: 8 # Increased for full training
num_workers: 16 # Utilize more CPU cores
regenerate_csv: false
min_water_pixels: 1

# Training configuration
ckpt_path: null

# Optional: Resume from checkpoint
# ckpt_path: "/path/to/checkpoint.ckpt"
Loading
Loading