diff --git a/ohsome_quality_api/attributes/definitions.py b/ohsome_quality_api/attributes/definitions.py index c75772c29..090267643 100644 --- a/ohsome_quality_api/attributes/definitions.py +++ b/ohsome_quality_api/attributes/definitions.py @@ -52,7 +52,31 @@ def get_attribute_preset(topic_key: str) -> List[Attribute]: ) from error -def build_attribute_filter(attribute_key: List[str] | str, topic_key: str) -> str: +def build_attribute_filter_ohsomedb( + attribute_key: List[str] | str, topic_key: str +) -> str: + """Build attribute filter for ohsome API query.""" + attributes = get_attributes() + try: + if isinstance(attribute_key, str): + return attribute_key + else: + attribute_filter = "" + for i, key in enumerate(attribute_key): + if i == 0: + attribute_filter = "(" + attributes[topic_key][key].filter + ")" + else: + attribute_filter += ( + " and (" + attributes[topic_key][key].filter + ")" + ) + return attribute_filter + except KeyError as error: + raise KeyError("Invalid topic or attribute key(s).") from error + + +def build_attribute_filter_ohsomeapi( + attribute_key: List[str] | str, topic_key: str +) -> str: """Build attribute filter for ohsome API query.""" attributes = get_attributes() try: diff --git a/ohsome_quality_api/indicators/attribute_completeness/indicator.py b/ohsome_quality_api/indicators/attribute_completeness/indicator.py index a81c97fc1..870f147c9 100644 --- a/ohsome_quality_api/indicators/attribute_completeness/indicator.py +++ b/ohsome_quality_api/indicators/attribute_completeness/indicator.py @@ -1,4 +1,5 @@ import logging +from datetime import datetime, timezone from string import Template import dateutil.parser @@ -7,10 +8,13 @@ from fastapi_i18n import _, get_locale from geojson import Feature +from ohsome_quality_api import ohsomedb from ohsome_quality_api.attributes.definitions import ( - build_attribute_filter, + build_attribute_filter_ohsomeapi, + build_attribute_filter_ohsomedb, get_attribute, ) +from ohsome_quality_api.config import get_config_value from ohsome_quality_api.indicators.base import BaseIndicator from ohsome_quality_api.ohsome import client as ohsome_client from ohsome_quality_api.topics.models import Topic @@ -60,24 +64,60 @@ def __init__( self.absolute_value_1 = None self.absolute_value_2 = None self.description = None - if self.attribute_keys: - self.attribute_filter = build_attribute_filter( - self.attribute_keys, - self.topic.key, - ) - self.attribute_title = ", ".join( - [ - get_attribute(self.topic.key, k).name.lower() - for k in self.attribute_keys - ] - ) + + async def preprocess(self): + ohsomedb_enabled = get_config_value("ohsomedb_enabled") + if ohsomedb_enabled is True or ohsomedb_enabled in ("True", "true"): + if self.attribute_keys: + self.attribute_filter = build_attribute_filter_ohsomedb( + self.attribute_keys, + self.topic.key, + ) + self.attribute_title = ", ".join( + [ + get_attribute(self.topic.key, k).name.lower() + for k in self.attribute_keys + ] + ) + else: + self.attribute_filter = build_attribute_filter_ohsomedb( + self.attribute_filter, + self.topic.key, + ) + await self.preprocess_ohsomedb() else: - self.attribute_filter = build_attribute_filter( - self.attribute_filter, - self.topic.key, - ) + if self.attribute_keys: + self.attribute_filter = build_attribute_filter_ohsomeapi( + self.attribute_keys, + self.topic.key, + ) + self.attribute_title = ", ".join( + [ + get_attribute(self.topic.key, k).name.lower() + for k in self.attribute_keys + ] + ) + else: + self.attribute_filter = build_attribute_filter_ohsomeapi( + self.attribute_filter, + self.topic.key, + ) + await self.preprocess_ohsomeapi() + + async def preprocess_ohsomedb(self) -> None: + # Get attribute filter + result = await ohsomedb.attribute_completeness( + aggregation=self.topic.aggregation_type, + bpolys=self.feature.geometry, + filter_=self.topic.filter, + attribute_filter_=self.attribute_filter, + ) + self.result.timestamp_osm = datetime.now(timezone.utc) + self.result.value = result[0]["attribute_completeness"] + self.absolute_value_1 = result[0]["total_aggregation"] + self.absolute_value_2 = result[0]["aggregation_with_attribute"] - async def preprocess(self) -> None: + async def preprocess_ohsomeapi(self) -> None: # Get attribute filter response = await ohsome_client.query( self.topic, diff --git a/ohsome_quality_api/indicators/building_comparison/indicator.py b/ohsome_quality_api/indicators/building_comparison/indicator.py index 316e1c4a6..60eec8572 100644 --- a/ohsome_quality_api/indicators/building_comparison/indicator.py +++ b/ohsome_quality_api/indicators/building_comparison/indicator.py @@ -13,6 +13,8 @@ from geojson import Feature from numpy import mean +from ohsome_quality_api import ohsomedb +from ohsome_quality_api.config import get_config_value from ohsome_quality_api.definitions import Color, get_attribution from ohsome_quality_api.geodatabase import client as db_client from ohsome_quality_api.indicators.base import BaseIndicator @@ -71,7 +73,14 @@ async def coverage(cls, inverse=False) -> list[Feature]: def attribution(cls) -> str: return get_attribution(["OSM", "EUBUCCO", "Microsoft Buildings"]) - async def preprocess(self) -> None: + async def preprocess(self): + ohsomedb_enabled = get_config_value("ohsomedb_enabled") + if ohsomedb_enabled is True or ohsomedb_enabled in ("True", "true"): + await self.preprocess_ohsomedb() + else: + await self.preprocess_ohsomeapi() + + async def preprocess_ohsomeapi(self) -> None: for key, val in self.data_ref.items(): # get coverage [%] self.area_cov[key] = await db_client.get_intersection_area( @@ -100,6 +109,38 @@ async def preprocess(self) -> None: timestamp = result["result"][0]["timestamp"] self.result.timestamp_osm = parser.isoparse(timestamp) + async def preprocess_ohsomedb(self) -> None: + for key, val in self.data_ref.items(): + # get coverage [%] + self.area_cov[key] = await db_client.get_intersection_area( + self.feature, + val["coverage"]["simple"], + ) + if self.check_major_edge_cases(key) != "": + continue + + # clip input geom with coverage of reference dataset + feature = await db_client.get_intersection_geom( + self.feature, + val["coverage"]["simple"], + ) + + # get reference building area + result = await get_reference_building_area( + geojson.dumps(feature), val["table_name"] + ) + self.area_ref[key] = result / (1000 * 1000) + + # get osm building area + result = await ohsomedb.single_snapshot_aggregation( + aggregation=self.topic.aggregation_type, + bpolys=self.feature.geometry, + filter_=self.topic.filter, + ) + value = float(result[0]["value"]) or 0.0 # if None + self.area_osm[key] = value / (1000 * 1000) + self.result.timestamp_osm = result[0]["snapshot_ts"] + def calculate(self) -> None: major_edge_case: bool = False result_description: str = "" diff --git a/ohsome_quality_api/indicators/land_cover_completeness/indicator.py b/ohsome_quality_api/indicators/land_cover_completeness/indicator.py index 10e6f91af..32f87b649 100644 --- a/ohsome_quality_api/indicators/land_cover_completeness/indicator.py +++ b/ohsome_quality_api/indicators/land_cover_completeness/indicator.py @@ -7,6 +7,8 @@ from fastapi_i18n import _, get_locale from geojson import Feature +from ohsome_quality_api import ohsomedb +from ohsome_quality_api.config import get_config_value from ohsome_quality_api.indicators.base import BaseIndicator from ohsome_quality_api.ohsome import client as ohsome_client from ohsome_quality_api.topics.models import Topic @@ -26,15 +28,31 @@ def __init__( self.th_low = 0.50 # Above or equal to this value label should be yellow async def preprocess(self): - # get osm building area + ohsomedb_enabled = get_config_value("ohsomedb_enabled") + if ohsomedb_enabled is True or ohsomedb_enabled in ("True", "true"): + await self.preprocess_ohsomedb() + else: + await self.preprocess_ohsomeapi() + async def preprocess_ohsomeapi(self): result = await ohsome_client.query(self.topic, self.feature, density=True) self.osm_area_ratio = result["result"][0]["value"] or 0.0 # if None timestamp = result["result"][0]["timestamp"] self.result.timestamp_osm = parser.isoparse(timestamp) + async def preprocess_ohsomedb(self): + # get osm building area + + result = await ohsomedb.density( + aggregation=self.topic.aggregation_type, + bpolys=self.feature.geometry, + filter_=self.topic.filter, + ) + self.osm_area_ratio = result[0]["value"] or 0.0 # if None + self.result.timestamp_osm = result[0]["snapshot_ts"] + def calculate(self): - self.osm_area_ratio /= 1000000 + # self.osm_area_ratio /= 1000000 self.result.value = round(self.osm_area_ratio, 2) if self.result.value >= self.th_high: self.result.class_ = 5 diff --git a/ohsome_quality_api/ohsomedb/__init__.py b/ohsome_quality_api/ohsomedb/__init__.py index 316b2f32e..dfe6ffbae 100644 --- a/ohsome_quality_api/ohsomedb/__init__.py +++ b/ohsome_quality_api/ohsomedb/__init__.py @@ -1,3 +1,15 @@ -from .contributions import contributions, users +from .requests import ( + attribute_completeness, + contributions, + density, + single_snapshot_aggregation, + users, +) -__all__ = ("contributions", "users") +__all__ = ( + "attribute_completeness", + "contributions", + "density", + "single_snapshot_aggregation", + "users", +) diff --git a/ohsome_quality_api/ohsomedb/contributions.py b/ohsome_quality_api/ohsomedb/contributions.py deleted file mode 100644 index c9989341f..000000000 --- a/ohsome_quality_api/ohsomedb/contributions.py +++ /dev/null @@ -1,65 +0,0 @@ -from pathlib import Path -from typing import Literal - -from geojson_pydantic.geometries import MultiPolygon, Polygon -from jinja2 import Environment, FileSystemLoader, select_autoescape -from ohsome_filter_to_sql.main import OhsomeFilter, ohsome_filter_to_sql -from pydantic import validate_call - -from ohsome_quality_api.config import get_config_value -from ohsome_quality_api.geodatabase import client - -ENV = Environment( - loader=FileSystemLoader(Path(__file__).parent / "templates"), - autoescape=select_autoescape(), -) - - -@validate_call -async def contributions( - *, - aggregation: Literal["count", "length", "area"], - bpolys: Polygon | MultiPolygon, - filter_: OhsomeFilter, -): - sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) - template = ENV.get_template("contributions.sql") - query = template.render( - **{ - "aggregation": aggregation, - "contributions": get_config_value("ohsomedb_contributions_table"), - "geom": len(sql_filter_args) + 1, - "filter": sql_filter, - } - ) - return await client.fetch( - query, - *sql_filter_args, - bpolys.model_dump_json(), - database="ohsomedb", - ) - - -@validate_call -async def users( - *, - aggregation: Literal["count"] = "count", - bpolys: Polygon | MultiPolygon, - filter_: OhsomeFilter, -): - sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) - template = ENV.get_template("users.sql") - query = template.render( - **{ - # "aggregation": aggregation, - "contributions": get_config_value("ohsomedb_contributions_table"), - "geom": len(sql_filter_args) + 1, - "filter": sql_filter, - } - ) - return await client.fetch( - query, - *sql_filter_args, - bpolys.model_dump_json(), - database="ohsomedb", - ) diff --git a/ohsome_quality_api/ohsomedb/requests.py b/ohsome_quality_api/ohsomedb/requests.py new file mode 100644 index 000000000..653678a4a --- /dev/null +++ b/ohsome_quality_api/ohsomedb/requests.py @@ -0,0 +1,171 @@ +from pathlib import Path +from typing import Literal + +from geojson_pydantic.geometries import MultiPolygon, Polygon +from jinja2 import Environment, FileSystemLoader, select_autoescape +from ohsome_filter_to_sql.main import OhsomeFilter, ohsome_filter_to_sql +from pydantic import validate_call + +from ohsome_quality_api.config import get_config_value +from ohsome_quality_api.geodatabase import client + +ENV = Environment( + loader=FileSystemLoader(Path(__file__).parent / "templates"), + autoescape=select_autoescape(), +) + + +@validate_call +async def contributions( + *, + aggregation: Literal["count", "length", "area"], + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + template = ENV.get_template("contributions.sql") + query = template.render( + **{ + "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(sql_filter_args) + 1, + "filter": sql_filter, + } + ) + return await client.fetch( + query, + *sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) + + +@validate_call +async def users( + *, + aggregation: Literal["count"] = "count", + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + template = ENV.get_template("users.sql") + query = template.render( + **{ + # "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(sql_filter_args) + 1, + "filter": sql_filter, + } + ) + return await client.fetch( + query, + *sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) + + +@validate_call +async def saturation( + *, + aggregation: Literal["count", "length", "area"], + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + template = ENV.get_template("saturation.sql") + query = template.render( + **{ + "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(sql_filter_args) + 1, + "filter": sql_filter, + } + ) + return await client.fetch( + query, + *sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) + + +@validate_call +async def attribute_completeness( + *, + aggregation: Literal["count", "length", "area"], + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, + attribute_filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + attribute_sql_filter, attribute_sql_filter_args = ohsome_filter_to_sql( + attribute_filter_, args_shift=len(sql_filter_args) + ) + template = ENV.get_template("attribute_completeness.sql") + query = template.render( + **{ + "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(attribute_sql_filter_args) + len(sql_filter_args) + 1, + "filter": sql_filter, + "attribute_filter": attribute_sql_filter, + } + ) + total_sql_filter_args = sql_filter_args + attribute_sql_filter_args + return await client.fetch( + query, + *total_sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) + + +@validate_call +async def single_snapshot_aggregation( + *, + aggregation: Literal["count", "length", "area"], + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + template = ENV.get_template("single_snapshot_aggregation.sql") + query = template.render( + **{ + "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(sql_filter_args) + 1, + "filter": sql_filter, + } + ) + return await client.fetch( + query, + *sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) + + +@validate_call +async def density( + *, + aggregation: Literal["area"], + bpolys: Polygon | MultiPolygon, + filter_: OhsomeFilter, +): + sql_filter, sql_filter_args = ohsome_filter_to_sql(filter_) + template = ENV.get_template("density.sql") + query = template.render( + **{ + "aggregation": aggregation, + "contributions": get_config_value("ohsomedb_contributions_table"), + "geom": len(sql_filter_args) + 1, + "filter": sql_filter, + } + ) + return await client.fetch( + query, + *sql_filter_args, + bpolys.model_dump_json(), + database="ohsomedb", + ) diff --git a/ohsome_quality_api/ohsomedb/templates/attribute_completeness.sql b/ohsome_quality_api/ohsomedb/templates/attribute_completeness.sql new file mode 100644 index 000000000..2b54a63bd --- /dev/null +++ b/ohsome_quality_api/ohsomedb/templates/attribute_completeness.sql @@ -0,0 +1,62 @@ +-- Parsing the GeoJSON directly in the WHERE clause instead of +-- in a WITH clause makes the query faster +with stats AS ( + SELECT + {% if aggregation == 'length' %} + 0.001 * SUM( + CASE + WHEN ST_Within( + c.geom, + ST_GeomFromGeoJSON(${{ geom }}) + ) + THEN c.length -- Use precomputed area from ohsome-planet + ELSE ST_Length( + ST_Intersection( + c.geom, + ST_GeomFromGeoJSON(${{ geom }}) + )::geography + ) + END + )::BIGINT + {% elif aggregation == 'area' or aggregation == 'area\density' %} + 0.001 * 0.001 * SUM( + CASE + WHEN ST_Within( + c.geom, + ST_GeomFromGeoJSON(${{ geom }}) + ) + THEN c.area -- Use precomputed area from ohsome-planet + ELSE ST_Area( + ST_Intersection( + c.geom, + ST_GeomFromGeoJSON(${{ geom }}) + )::geography + ) + END + )::BIGINT + {% else %} + COUNT(*) + {% endif %} + AS aggregation, + {{ attribute_filter }} has_attribute + FROM + {{ contributions }} c + WHERE 1=1 + -- TODO: this would be more performant but ohsome-filter-to-sql can not generate this + -- clause because is does not know about "latest" + -- AND status_geom_type = ANY(ARRAY[('latest', 'Polygon'), ('latest', 'MultiPolygon')]::_status_geom_type_type) + AND (status_geom_type).status = 'latest' -- excludes deleted + -- ohsome-filter-to-sql generated clause + AND ({{ filter }}) + AND ST_Intersects(c.geom, ST_GeomFromGeoJSON(${{ geom }})) + GROUP BY has_attribute +) +SELECT + stats_has_attribute.aggregation + stats_no_attribute.aggregation AS total_aggregation, + stats_has_attribute.aggregation AS aggregation_with_attribute, + stats_has_attribute.aggregation / (stats_has_attribute.aggregation + stats_no_attribute.aggregation)::float AS attribute_completeness +FROM stats stats_has_attribute, stats stats_no_attribute +WHERE 1=1 + AND stats_has_attribute.has_attribute + AND NOT stats_no_attribute.has_attribute +; diff --git a/ohsome_quality_api/ohsomedb/templates/density.sql b/ohsome_quality_api/ohsomedb/templates/density.sql new file mode 100644 index 000000000..43f0038c6 --- /dev/null +++ b/ohsome_quality_api/ohsomedb/templates/density.sql @@ -0,0 +1,36 @@ +-- WITH poly AS ( +-- SELECT ST_GeomFromGeoJSON(${{ geom }}) AS geom +-- ), +-- intersecting AS ( +-- SELECT c.geom +-- FROM {{ contributions }} c, poly p +-- WHERE +-- (status_geom_type).status = 'latest' +-- AND valid_to >= NOW()::timestamp +-- AND valid_from < NOW()::timestamp +-- AND ({{ filter }}) +-- AND ST_Intersects(c.geom, p.geom) +-- ) +-- SELECT +-- NOW()::timestamp AS snapshot_ts, +-- ST_Area(ST_UnaryUnion(ST_Union(i.geom))::geography) AS non_overlapping_geom +-- FROM intersecting i, poly p; + +WITH poly AS ( + SELECT ST_GeomFromGeoJSON(${{ geom }}) AS geom +) +SELECT + NOW()::timestamp AS snapshot_ts, + ST_Area(ST_UNION( + ST_Intersection(c.geom, p.geom) + )::geography) / ST_Area(p.geom::geography) as value +FROM {{ contributions }} c, poly p +WHERE + (status_geom_type).status = 'latest' + AND valid_to >= NOW()::timestamp + AND valid_from < NOW()::timestamp + AND ({{ filter }}) + AND ST_Intersects(c.geom, p.geom) +GROUP BY p.geom +; + diff --git a/ohsome_quality_api/ohsomedb/templates/single_snapshot_aggregation.sql b/ohsome_quality_api/ohsomedb/templates/single_snapshot_aggregation.sql new file mode 100644 index 000000000..6c7900b70 --- /dev/null +++ b/ohsome_quality_api/ohsomedb/templates/single_snapshot_aggregation.sql @@ -0,0 +1,31 @@ +WITH poly AS ( + SELECT ST_GeomFromGeoJSON(${{ geom }}) AS geom +) +SELECT + NOW()::timestamp AS snapshot_ts, + {% if aggregation == 'length' %} + SUM( + CASE + WHEN ST_Within(c.geom, p.geom) + THEN c.length -- Use precomputed length from ohsome-planet + ELSE ST_Length(ST_Intersection(c.geom, p.geom)) + END + )::BIGINT AS value + {% elif aggregation == 'area' or aggregation == 'area\density' %} + SUM( + CASE + WHEN ST_Within(c.geom, p.geom) + THEN c.area -- Use precomputed area from ohsome-planet + ELSE ST_Area(ST_Intersection(c.geom, p.geom)) + END + )::BIGINT AS value + {% else %} + COUNT(*) AS value + {% endif %} +FROM {{ contributions }} c, poly AS p +WHERE 1=1 + AND (status_geom_type).status IN ('latest') + AND valid_to >= NOW()::timestamp + AND valid_from < NOW()::timestamp -- before last snapshot time + AND ({{ filter }}) + AND ST_Intersects(c.geom, p.geom);