diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a539852 --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Custom +.vscode +data +project_docs +qgis diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..1281604 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10.7 diff --git a/README.md b/README.md index d0ec1d0..384f517 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ -# plantilla -Plantilla de proyecto +# Dencity + + diff --git a/notebooks/heuristic_processing.py b/notebooks/heuristic_processing.py new file mode 100644 index 0000000..f0d47b7 --- /dev/null +++ b/notebooks/heuristic_processing.py @@ -0,0 +1,49 @@ +# %% +import rasterio +from rasterio.plot import show + +from preprocessing.utils import calculate_ndvi, mask_raster_band_by_value + +MDS_INPUT_FILE = "data/ideuy/K29D6P6_MDS_Remesa_01_MVD.tif" +MDT_INPUT_FILE = "data/ideuy/K29D6P6_MDT_Remesa_01_MVD.tif" +RGBI_INPUT_FILE = "data/ideuy/K29D6P6_RGBI_16_Remesa_01_MVD.tif" +NDVI_OUTPUT_FILE = "data/ideuy/K29D6P6_NDVI_16_Remesa_01_MVD.tif" +DIFF_OUTPUT_FILE = "data/ideuy/K29D6P6_MVD_diff.tif" + +# %% +# Read data +mds_raster = rasterio.open(MDS_INPUT_FILE) +mdt_raster = rasterio.open(MDT_INPUT_FILE) +rgbi_raster = rasterio.open(RGBI_INPUT_FILE) + +# %% +# Plot rasters +show(mds_raster) +show(mdt_raster) +show(rgbi_raster) + +# %% +# Raster operations: MDS - MDT + +# Mask no-data values (value == -32767.0) +mds_no_data_value = mds_raster.read(1).min() +mdt_no_data_value = mdt_raster.read(1).min() + +mds_masked_raster = mask_raster_band_by_value( + raster_band=mds_raster.read(1), no_data_value=mds_no_data_value +) +mdt_masked_raster = mask_raster_band_by_value( + raster_band=mdt_raster.read(1), no_data_value=mdt_no_data_value +) + +# diff_raster = mds_masked_raster - mdt_masked_raster + +# %% + +# Calculate NDVI +ndvi_raster = calculate_ndvi(input_rgbi_raster_file=RGBI_INPUT_FILE, output_file=NDVI_OUTPUT_FILE) + +# Plot raster +show(ndvi_raster) + +# %% diff --git a/preprocessing/constants.py b/preprocessing/constants.py new file mode 100644 index 0000000..28b622f --- /dev/null +++ b/preprocessing/constants.py @@ -0,0 +1,22 @@ +# System +NUM_PARALLEL_JOBS = -2 + +# Geographic +CRS_EPSG = 5382 + +# Raster +RASTER_SCALE_FACTOR = 1 / 4 + +# Data output +OUTPUT_VECTORS = "data/outputs/vector" +OUTPUT_RASTERS = "data/outputs/raster" +MDS_OUTPUT_FILE = f"{OUTPUT_RASTERS}/K29D6P6_MDS_MVD_025x.tif" +NDVI_OUTPUT_FILE = f"{OUTPUT_RASTERS}/K29D6P6_NDVI_16_Remesa_01_MVD.tif" + +# Data input +INPUT_VECTORS = "" +INPUT_RASTERS_IDEUY = "data/ideuy" + +MDS_INPUT_FILE = f"{INPUT_RASTERS_IDEUY}/K29D6P6_MDS_Remesa_01_MVD.tif" +MDT_INPUT_FILE = f"{INPUT_RASTERS_IDEUY}/K29D6P6_MDT_Remesa_01_MVD.tif" +RGBI_INPUT_FILE = f"{INPUT_RASTERS_IDEUY}/K29D6P6_RGBI_16_Remesa_01_MVD.tif" diff --git a/preprocessing/raster_to_polygons.py b/preprocessing/raster_to_polygons.py new file mode 100644 index 0000000..ede5896 --- /dev/null +++ b/preprocessing/raster_to_polygons.py @@ -0,0 +1,29 @@ +# %% +from pathlib import Path + +import rasterio +from constants import MDT_INPUT_FILE, OUTPUT_VECTORS +from rasterio import features +from utils import mask_raster_band_by_value, raster_to_polygons + +# %% +if __name__ == "__main__": + raster = rasterio.open(MDT_INPUT_FILE) + # Set mask based on no-data values + raster_no_data_value = raster.read(1).min() + mdt_masked_raster = mask_raster_band_by_value( + raster_band=raster.read(1), no_data_value=raster_no_data_value + ) + # Get polygon and value from each raster cell: + masked_raster_polygons = features.shapes(mdt_masked_raster) + # Convert to polygons + masked_polygons_gdf = raster_to_polygons( + shapes_from_raster=masked_raster_polygons, parallel=True + ) + # Write geodataframe + filename = Path(MDT_INPUT_FILE).name + output_file = Path(OUTPUT_VECTORS) / f"{filename}.geojson" + print(f"Saving output to {output_file}") + Path(OUTPUT_VECTORS).mkdir(parents=True, exist_ok=True) + masked_polygons_gdf.to_file(output_file, driver="GeoJSON") + print("Done!") diff --git a/preprocessing/rgb_downsampling.py b/preprocessing/rgb_downsampling.py new file mode 100644 index 0000000..30cdb4b --- /dev/null +++ b/preprocessing/rgb_downsampling.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from constants import ( + MDS_INPUT_FILE, + MDS_OUTPUT_FILE, + OUTPUT_RASTERS, + RASTER_SCALE_FACTOR, +) +from utils import save_raster, transform_raster + +if __name__ == "__main__": + Path(OUTPUT_RASTERS).mkdir(parents=True, exist_ok=True) + original_rgb, trans_rgb, transform = transform_raster(MDS_INPUT_FILE, RASTER_SCALE_FACTOR) + save_raster(original_rgb, trans_rgb, transform, MDS_OUTPUT_FILE) + print("Done!") diff --git a/preprocessing/utils.py b/preprocessing/utils.py new file mode 100644 index 0000000..a688e9d --- /dev/null +++ b/preprocessing/utils.py @@ -0,0 +1,125 @@ +from typing import Dict, Generator, List, Tuple + +import affine +import geopandas as gpd +import numpy as np +import rasterio +from constants import CRS_EPSG, NUM_PARALLEL_JOBS +from joblib import Parallel, delayed +from rasterio.enums import Resampling +from shapely.geometry import Polygon + + +def normalize(x: np.array, lower: int, upper: int): + """Normalize an array to a given bound interval""" + print(f"Normalizing raster values to {lower}-{upper} range") + x_max = np.max(x) + x_min = np.min(x) + m = (upper - lower) / (x_max - x_min) + x_norm = (m * (x - x_min)) + lower + return x_norm + + +def transform_raster(input_file: str, scale_factor: float): + print(f"Scaling raster {input_file}") + with rasterio.open(input_file) as original_rgb: + # Resample data to target shape + trans_rgb = original_rgb.read( + out_shape=( + original_rgb.count, + int(original_rgb.height * scale_factor), + int(original_rgb.width * scale_factor), + ), + resampling=Resampling.bilinear, + ) + + # Scale image transform + transform = original_rgb.transform * original_rgb.transform.scale( + (original_rgb.width / trans_rgb.shape[-1]), (original_rgb.height / trans_rgb.shape[-2]) + ) + return original_rgb, trans_rgb, transform + + +def save_raster( + original_rgb: rasterio.DatasetReader, trans_rgb: np.array, transform: affine.Affine, output: str +): + print(f"Saving raster to {output}") + dst_kwargs = original_rgb.meta.copy() + dst_kwargs.update( + { + "transform": transform, + "width": trans_rgb.shape[-1], + "height": trans_rgb.shape[-2], + "count": trans_rgb.shape[0] - 1, + } + ) + + with rasterio.open(output, "w", **dst_kwargs) as dst: + for i in range(trans_rgb.shape[0] - 1): + dst.write(normalize(trans_rgb[i], 0, 255).astype(rasterio.uint8), i + 1) + + +def mask_raster_band_by_value(raster_band: np.ndarray, no_data_value: float): + masked_raster_band = np.ma.masked_array(raster_band, mask=(raster_band == no_data_value)) + return masked_raster_band + + +def calculate_ndvi(input_rgbi_raster_file: str, output_file: str = None): + rgbi_raster = rasterio.open(input_rgbi_raster_file) + + # Get red band + band_red = rgbi_raster.read(1) + + # Get NIR band + band_nir = rgbi_raster.read(4) + + # Allow division by zero + np.seterr(divide="ignore", invalid="ignore") + + # Calculate NDVI + ndvi_band = (band_nir - band_red) / (band_nir + band_red) + + # Set pixels whose values are outside the NDVI range (-1, 1) to NaN + ndvi_band[ndvi_band > 1] = np.nan + ndvi_band[ndvi_band < -1] = np.nan + + if output_file: + dst_kwargs = rgbi_raster.meta.copy() + dst_kwargs.update({"count": 1, "dtype": np.float64, "nodata": np.nan}) + with rasterio.open(output_file, "w", **dst_kwargs) as dst: + dst.write(ndvi_band, 1) + + return ndvi_band + + +def build_geodataframe_inputs( + coordinates: List[Tuple[int, int]], cell_value: int +) -> Dict[int, Polygon]: + dict = {"cell_value": [cell_value], "geometry": [Polygon(coordinates)]} + return dict + + +def raster_to_polygons(shapes_from_raster: Generator, parallel: bool = True) -> gpd.GeoDataFrame: + + if parallel: + print("Running raster to polygons in parallel to speed up the process") + polygons_dicts = Parallel(n_jobs=NUM_PARALLEL_JOBS, backend="loky")( + [ + delayed(build_geodataframe_inputs)(polygon.get("coordinates")[0], value) + for polygon, value in shapes_from_raster + ] + ) + else: + print("Running raster to polygons sequentially, it could take some time...") + polygons_dicts = [] + for polygon, value in shapes_from_raster: + polygons_dicts.append(build_geodataframe_inputs(polygon.get("coordinates")[0], value)) + + polygons_dict_gdf = {"cell_value": [], "geometry": []} + + for poly in polygons_dicts: + cell_value, geometry = poly.get("cell_value")[0], poly.get("geometry")[0] + polygons_dict_gdf["cell_value"].append(cell_value) + polygons_dict_gdf["geometry"].append(geometry) + + return gpd.GeoDataFrame(data=polygons_dict_gdf, crs=CRS_EPSG) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ab55bfa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,62 @@ +affine==2.3.1 +attrs==22.1.0 +backcall==0.2.0 +black==22.10.0 +certifi==2022.9.24 +click==8.1.3 +click-plugins==1.1.1 +cligj==0.7.2 +cycler==0.11.0 +debugpy==1.6.3 +decorator==5.1.1 +entrypoints==0.4 +Fiona==1.8.22 +flake8==5.0.4 +fonttools==4.37.4 +geopandas==0.10.2 +importlib-metadata==4.2.0 +ipykernel==6.16.0 +ipython==7.34.0 +isort==5.10.1 +jedi==0.18.1 +joblib==1.2.0 +jupyter-core==4.11.1 +jupyter_client==7.4.2 +kiwisolver==1.4.4 +matplotlib==3.5.3 +matplotlib-inline==0.1.6 +mccabe==0.7.0 +munch==2.5.0 +mypy-extensions==0.4.3 +nest-asyncio==1.5.6 +numpy==1.21.6 +packaging==21.3 +pandas==1.1.5 +parso==0.8.3 +pathspec==0.10.1 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==9.2.0 +platformdirs==2.5.2 +prompt-toolkit==3.0.31 +psutil==5.9.2 +ptyprocess==0.7.0 +pycodestyle==2.9.1 +pyflakes==2.5.0 +Pygments==2.13.0 +pyparsing==3.0.9 +pyproj==3.2.1 +python-dateutil==2.8.2 +pytz==2022.5 +pyzmq==24.0.1 +rasterio==1.2.10 +Shapely==1.8.5.post1 +six==1.16.0 +snuggs==1.4.7 +tomli==2.0.1 +tornado==6.2 +traitlets==5.4.0 +typed-ast==1.5.4 +typing_extensions==4.4.0 +wcwidth==0.2.5 +zipp==3.9.0 diff --git a/set-pyenv.sh b/set-pyenv.sh new file mode 100644 index 0000000..57ca935 --- /dev/null +++ b/set-pyenv.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Install pre-requisites for pyenv in Debian based OSes +sudo apt-get update; sudo apt-get install make build-essential libssl-dev zlib1g-dev \ +libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ +libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev git + +# Install pyenv +curl https://pyenv.run | bash + +# Set bash +echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc +echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc +echo 'eval "$(pyenv init -)"' >> ~/.bashrc +echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc + +# Restart shell +exec $SHELL + +# Install pyenv-virtualenv +git clone https://github.com/pyenv/pyenv-virtualenv.git $(pyenv root)/plugins/pyenv-virtualenv + +# Add pyenv virtualenv-init to your shell +echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc + +# Restart shell +exec $SHELL \ No newline at end of file diff --git a/set-virtualenv.sh b/set-virtualenv.sh new file mode 100644 index 0000000..6ce84e9 --- /dev/null +++ b/set-virtualenv.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Set python version and venv name +export PYTHON_VERSION=3.10.7 +export PYTHON_VENV=dencity_3.10.7 + +echo "Installing Python $PYTHON_VERSION..." +pyenv install $PYTHON_VERSION +pyenv local $PYTHON_VERSION + +echo "Setting python shell..." +pyenv shell $PYTHON_VERSION + +echo "Creating virtual environment..." +pyenv virtualenv $PYTHON_VENV + +echo "Activating virtual environment..." +pyenv activate $PYTHON_VENV + +echo "Upgrade pip" +pip install --upgrade pip + +echo "Installing development Python libraries" +pip install black isort flake8 + +echo "Done!" \ No newline at end of file