From 0d6713a65222ccfc7853abc0407e4aa0155c2771 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 17:22:49 -0500 Subject: [PATCH 1/5] add tests --- parquet/tests/geospatial.rs | 130 +++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 3 deletions(-) diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs index bf34528d03e8..eaed85843bc4 100644 --- a/parquet/tests/geospatial.rs +++ b/parquet/tests/geospatial.rs @@ -30,7 +30,7 @@ mod test { ArrowSchemaConverter, ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder, arrow_writer::ArrowWriterOptions, }, - basic::LogicalType, + basic::{EdgeInterpolationAlgorithm, LogicalType}, column::reader::ColumnReader, data_type::{ByteArray, ByteArrayType}, file::{ @@ -63,7 +63,7 @@ mod test { ( "crs-default.parquet", LogicalType::geometry(None), - WkbMetadata::new(None, None), + WkbMetadata::new(Some("OGC:CRS84"), None), // omitted CRS defaults to OGC:CRS84 ), ( "crs-srid.parquet", @@ -78,7 +78,7 @@ mod test { ( "crs-geography.parquet", LogicalType::geography(None, None), - WkbMetadata::new(None, Some(WkbEdges::Spherical)), + WkbMetadata::new(Some("OGC:CRS84"), Some(WkbEdges::Spherical)), // omitted CRS defaults to OGC:CRS84 ), ]; @@ -425,4 +425,128 @@ mod test { ); Arc::new(array) } + + #[test] + fn test_logical_type_to_field_conversion() { + use parquet::arrow::parquet_to_arrow_schema; + use parquet::basic::Type as PhysicalType; + use parquet::schema::types::{SchemaDescriptor, Type}; + + // Test cases: (LogicalType, expected extension name, expected metadata JSON) + let test_cases = [ + // Geometry with default CRS (defaults to OGC:CRS84 per Parquet spec) + ( + LogicalType::geometry(None), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84"}"#, + ), + // Geometry with srid:0 should result in an unset (omitted) CRS + ( + LogicalType::geometry(Some("srid:0".to_string())), + "geoarrow.wkb", + r#"{}"#, + ), + // Geometry with custom CRSes (authority:code and partial projjson) + ( + LogicalType::geometry(Some("EPSG:4267".to_string())), + "geoarrow.wkb", + r#"{"crs":"EPSG:4267"}"#, + ), + ( + LogicalType::geometry(Some( + r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string(), + )), + "geoarrow.wkb", + r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#, + ), + // Geography with default CRS (default OGC:CRS84, spherical edges) + ( + LogicalType::geography(None, None), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"spherical"}"#, + ), + // Geography with explicit edges + ( + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::SPHERICAL)), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"spherical"}"#, + ), + ( + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::KARNEY)), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"karney"}"#, + ), + ( + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::VINCENTY)), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#, + ), + ( + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::ANDOYER)), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#, + ), + ( + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::THOMAS)), + "geoarrow.wkb", + r#"{"crs":"OGC:CRS84","edges":"thomas"}"#, + ), + // Geometry with srid:0 should result in an unset (omitted) CRS + // and spherical edges + ( + LogicalType::geography(Some("srid:0".to_string()), None), + "geoarrow.wkb", + r#"{"edges":"spherical"}"#, + ), + // Geography with custom CRSes (authority:code and partial projjson) + ( + LogicalType::geography(Some("EPSG:4267".to_string()), None), + "geoarrow.wkb", + r#"{"crs":"EPSG:4267","edges":"spherical"}"#, + ), + ( + LogicalType::geography( + Some(r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string()), + None, + ), + "geoarrow.wkb", + r#"{"crs":{"id":{"authority":"EPSG","code":4326}},"edges":"spherical"}"#, + ), + ]; + + for (logical_type, expected_extension_name, expected_metadata) in test_cases { + // Build a Parquet schema with the given LogicalType + let parquet_schema = SchemaDescriptor::new(Arc::new( + Type::group_type_builder("schema") + .with_fields(vec![Arc::new( + Type::primitive_type_builder("geom", PhysicalType::BYTE_ARRAY) + .with_logical_type(Some(logical_type.clone())) + .build() + .unwrap(), + )]) + .build() + .unwrap(), + )); + + // Convert to Arrow schema + let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None).unwrap(); + let field = arrow_schema.field(0); + + // Check extension type name + let ext_name = field.metadata().get("ARROW:extension:name"); + assert_eq!( + ext_name, + Some(&expected_extension_name.to_string()), + "Extension name mismatch for {logical_type:?}" + ); + + // Check extension metadata + let ext_metadata = field.metadata().get("ARROW:extension:metadata"); + assert_eq!( + ext_metadata, + Some(&expected_metadata.to_string()), + "Extension metadata mismatch for {logical_type:?}" + ); + } + } } From a9db38289b79f879c9014efb1847f4a7b72fa242 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 17:37:29 -0500 Subject: [PATCH 2/5] more tests --- parquet/tests/geospatial.rs | 128 +++++++++++++++++++++++++++++++----- 1 file changed, 112 insertions(+), 16 deletions(-) diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs index eaed85843bc4..97a4c3224f8a 100644 --- a/parquet/tests/geospatial.rs +++ b/parquet/tests/geospatial.rs @@ -432,76 +432,64 @@ mod test { use parquet::basic::Type as PhysicalType; use parquet::schema::types::{SchemaDescriptor, Type}; - // Test cases: (LogicalType, expected extension name, expected metadata JSON) + // Test cases: (LogicalType, expected metadata JSON) let test_cases = [ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet spec) ( LogicalType::geometry(None), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84"}"#, ), // Geometry with srid:0 should result in an unset (omitted) CRS ( LogicalType::geometry(Some("srid:0".to_string())), - "geoarrow.wkb", r#"{}"#, ), // Geometry with custom CRSes (authority:code and partial projjson) ( LogicalType::geometry(Some("EPSG:4267".to_string())), - "geoarrow.wkb", r#"{"crs":"EPSG:4267"}"#, ), ( LogicalType::geometry(Some( r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string(), )), - "geoarrow.wkb", r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#, ), // Geography with default CRS (default OGC:CRS84, spherical edges) ( LogicalType::geography(None, None), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"spherical"}"#, ), // Geography with explicit edges ( LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::SPHERICAL)), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"spherical"}"#, ), ( LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::KARNEY)), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"karney"}"#, ), ( LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::VINCENTY)), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#, ), ( LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::ANDOYER)), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#, ), ( LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::THOMAS)), - "geoarrow.wkb", r#"{"crs":"OGC:CRS84","edges":"thomas"}"#, ), // Geometry with srid:0 should result in an unset (omitted) CRS // and spherical edges ( LogicalType::geography(Some("srid:0".to_string()), None), - "geoarrow.wkb", r#"{"edges":"spherical"}"#, ), // Geography with custom CRSes (authority:code and partial projjson) ( LogicalType::geography(Some("EPSG:4267".to_string()), None), - "geoarrow.wkb", r#"{"crs":"EPSG:4267","edges":"spherical"}"#, ), ( @@ -509,12 +497,11 @@ mod test { Some(r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string()), None, ), - "geoarrow.wkb", r#"{"crs":{"id":{"authority":"EPSG","code":4326}},"edges":"spherical"}"#, ), ]; - for (logical_type, expected_extension_name, expected_metadata) in test_cases { + for (logical_type, expected_metadata) in test_cases { // Build a Parquet schema with the given LogicalType let parquet_schema = SchemaDescriptor::new(Arc::new( Type::group_type_builder("schema") @@ -536,7 +523,7 @@ mod test { let ext_name = field.metadata().get("ARROW:extension:name"); assert_eq!( ext_name, - Some(&expected_extension_name.to_string()), + Some(&"geoarrow.wkb".to_string()), "Extension name mismatch for {logical_type:?}" ); @@ -549,4 +536,113 @@ mod test { ); } } + + #[test] + fn test_field_to_logical_type_conversion() { + use std::collections::HashMap; + + // Test cases: (extension metadata JSON, expected LogicalType) + let test_cases = [ + // Geometry with no CRS should be GEOMETRY(srid:0) + ( + r#"{}"#, + LogicalType::geometry(Some("srid:0".to_string())), + ), + // Geometry with string CRS + ( + r#"{"crs":"EPSG:4267"}"#, + LogicalType::geometry(Some("\"EPSG:4267\"".to_string())), + ), + // Geometry with PROJJSON CRS + ( + r#"{"crs":{"id":{"authority":"EPSG","code":3857}}}"#, + LogicalType::geometry(Some(r#"{"id":{"authority":"EPSG","code":3857}}"#.to_string())), + ), + // Geometry with lon/lat CRSes (canonically removed because lon/lat is the + // default Parquet CRS) + ( + r#"{"crs":"OGC:CRS84"}"#, + LogicalType::geometry(None), + ), + ( + r#"{"crs":"EPSG:4326}"#, + LogicalType::geometry(None), + ), + ( + r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#, + LogicalType::geometry(None), + ), + ( + r#"{"crs":{"id":{"authority":"EPSG","code":"4326"}}}"#, + LogicalType::geometry(None), + ), + ( + r#"{"crs":{"id":{"authority":"OGC","code":"CRS84"}}}"#, + LogicalType::geometry(None), + ), + // Geography with no CRS, spherical edges + ( + r#"{"edges":"spherical"}"#, + LogicalType::geography(Some("srid:0".to_string()), None), + ), + // Geography with OGC:CRS84 and spherical edges + ( + r#"{"crs":"OGC:CRS84","edges":"spherical"}"#, + LogicalType::geography(None, None), + ), + // Geography with different edge algorithms + ( + r#"{"crs":"OGC:CRS84","edges":"karney"}"#, + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::KARNEY)), + ), + ( + r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#, + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::VINCENTY)), + ), + ( + r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#, + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::ANDOYER)), + ), + ( + r#"{"crs":"OGC:CRS84","edges":"thomas"}"#, + LogicalType::geography(None, Some(EdgeInterpolationAlgorithm::THOMAS)), + ), + // Geography with custom CRS and edges + ( + r#"{"crs":"EPSG:4267","edges":"karney"}"#, + LogicalType::geography(Some("\"EPSG:4267\"".to_string()), Some(EdgeInterpolationAlgorithm::KARNEY)), + ), + // Geography with PROJJSON CRS + ( + r#"{"crs":{"id":{"authority":"EPSG","code":4267}},"edges":"spherical"}"#, + LogicalType::geography(Some(r#"{"id":{"authority":"EPSG","code":4267}}"#.to_string()), None), + ), + ]; + + for (ext_metadata, expected_logical_type) in test_cases { + // Create an Arrow Field with raw extension metadata + let metadata = HashMap::from([ + ("ARROW:extension:name".to_string(), "geoarrow.wkb".to_string()), + ("ARROW:extension:metadata".to_string(), ext_metadata.to_string()), + ]); + let field = Arc::new( + Field::new("geom", DataType::Binary, true).with_metadata(metadata) + ); + let schema = Schema::new(vec![field]); + + // Convert to Parquet schema + let parquet_schema = ArrowSchemaConverter::new().convert(&schema).unwrap(); + + // Check the logical type + let column_descr = parquet_schema.column(0); + let logical_type = column_descr.logical_type_ref(); + + assert_eq!( + logical_type, + Some(&expected_logical_type), + "LogicalType mismatch for extension metadata: {ext_metadata}" + ); + } + } + } From cebc852b8e53405cf560406300fb1d55c6a99c4b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 17:38:09 -0500 Subject: [PATCH 3/5] this is ok --- parquet/src/arrow/schema/extension.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/schema/extension.rs b/parquet/src/arrow/schema/extension.rs index 353770ddbdbb..8a1ee649756d 100644 --- a/parquet/src/arrow/schema/extension.rs +++ b/parquet/src/arrow/schema/extension.rs @@ -67,7 +67,13 @@ pub(crate) fn try_add_extension_type( } #[cfg(feature = "geospatial")] LogicalType::Geometry(geometry) => { - let md = parquet_geospatial::WkbMetadata::new(geometry.crs.as_deref(), None); + // Per Parquet spec: omitted CRS defaults to OGC:CRS84, srid:0 means unset CRS + let crs = match geometry.crs.as_deref() { + None => Some("OGC:CRS84"), + Some("srid:0") => None, + Some(crs) => Some(crs), + }; + let md = parquet_geospatial::WkbMetadata::new(crs, None); let mut arrow_field = arrow_field; arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?; arrow_field @@ -78,7 +84,13 @@ pub(crate) fn try_add_extension_type( .algorithm() .map(|a| a.try_as_edges()) .transpose()?; - let md = parquet_geospatial::WkbMetadata::new(geography.crs.as_deref(), algorithm); + // Per Parquet spec: omitted CRS defaults to OGC:CRS84, srid:0 means unset CRS + let crs = match geography.crs.as_deref() { + None => Some("OGC:CRS84"), + Some("srid:0") => None, + Some(crs) => Some(crs), + }; + let md = parquet_geospatial::WkbMetadata::new(crs, algorithm); let mut arrow_field = arrow_field; arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?; arrow_field From 5c6f6fc944996ce365bebba863f2dbbea026d51a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 17:49:12 -0500 Subject: [PATCH 4/5] generated fix --- parquet-geospatial/src/types.rs | 74 ++++++++++----------------- parquet/src/arrow/schema/extension.rs | 34 ++++++++---- parquet/tests/geospatial.rs | 2 +- 3 files changed, 53 insertions(+), 57 deletions(-) diff --git a/parquet-geospatial/src/types.rs b/parquet-geospatial/src/types.rs index f19911ad055a..b19e9103fcc3 100644 --- a/parquet-geospatial/src/types.rs +++ b/parquet-geospatial/src/types.rs @@ -57,12 +57,10 @@ pub struct Metadata { /// The Coordinate Reference System (CRS) of the [`WkbType`], if present. /// /// This may be a raw string value (e.g., "EPSG:3857") or a JSON object (e.g., PROJJSON). - /// Note: Common lon/lat CRS representations (EPSG:4326, OGC:CRS84) are canonicalized - /// to `None` during serialization to match Parquet conventions. #[serde(skip_serializing_if = "Option::is_none")] pub crs: Option, /// The edge interpolation algorithm of the [`WkbType`], if present. - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "edges", skip_serializing_if = "Option::is_none")] pub algorithm: Option, } @@ -88,8 +86,10 @@ impl Metadata { } } - /// Detect if the CRS is a common representation of lon/lat on the standard WGS84 ellipsoid - fn crs_is_lon_lat(&self) -> bool { + /// Detect if the CRS is a common representation of lon/lat on the standard WGS84 ellipsoid. + /// + /// Returns `true` for OGC:CRS84, EPSG:4326, and PROJJSON representations thereof. + pub fn crs_is_lon_lat(&self) -> bool { use serde_json::Value; let Some(crs) = &self.crs else { @@ -144,16 +144,7 @@ impl ExtensionType for WkbType { } fn serialize_metadata(&self) -> Option { - let md = if self.0.crs_is_lon_lat() { - &Metadata { - crs: None, // lon/lat CRS is canonicalized as omitted (None) for Parquet - algorithm: self.0.algorithm, - } - } else { - &self.0 - }; - - serde_json::to_string(md).ok() + serde_json::to_string(&self.0).ok() } fn deserialize_metadata(metadata: Option<&str>) -> ArrowResult { @@ -251,7 +242,7 @@ mod tests { let wkb = WkbType::new(Some(metadata)); let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, r#"{"algorithm":"spherical"}"#); + assert_eq!(serialized, r#"{"edges":"spherical"}"#); let deserialized = WkbType::deserialize_metadata(Some(&serialized))?; assert!(deserialized.crs.is_none()); @@ -267,7 +258,7 @@ mod tests { let wkb = WkbType::new(Some(metadata)); let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, r#"{"crs":"srid:1234","algorithm":"spherical"}"#); + assert_eq!(serialized, r#"{"crs":"srid:1234","edges":"spherical"}"#); let deserialized = WkbType::deserialize_metadata(Some(&serialized))?; assert_eq!( @@ -353,54 +344,43 @@ mod tests { Ok(()) } - /// Test CRS canonicalization logic for common lon/lat representations + /// Test crs_is_lon_lat() detection for common lon/lat representations #[test] - fn test_crs_canonicalization() -> ArrowResult<()> { - // EPSG:4326 as string should be omitted + fn test_crs_is_lon_lat() -> ArrowResult<()> { + // EPSG:4326 as string should be detected let metadata = Metadata::new(Some("EPSG:4326"), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, "{}"); + assert!(metadata.crs_is_lon_lat()); - // OGC:CRS84 as string should be omitted + // OGC:CRS84 as string should be detected let metadata = Metadata::new(Some("OGC:CRS84"), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, "{}"); + assert!(metadata.crs_is_lon_lat()); - // A JSON object that reasonably looks like PROJJSON for EPSG:4326 should be omitted + // A JSON object that reasonably looks like PROJJSON for EPSG:4326 should be detected // detect "4326" as a string let crs_json = r#"{"id":{"authority":"EPSG","code":"4326"}}"#; let metadata = Metadata::new(Some(crs_json), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, "{}"); + assert!(metadata.crs_is_lon_lat()); // detect 4326 as a number let crs_json = r#"{"id":{"authority":"EPSG","code":4326}}"#; let metadata = Metadata::new(Some(crs_json), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, "{}"); + assert!(metadata.crs_is_lon_lat()); - // A JSON object that reasonably looks like PROJJSON for OGC:CRS84 should be omitted + // A JSON object that reasonably looks like PROJJSON for OGC:CRS84 should be detected let crs_json = r#"{"id":{"authority":"OGC","code":"CRS84"}}"#; let metadata = Metadata::new(Some(crs_json), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, "{}"); + assert!(metadata.crs_is_lon_lat()); - // Other input types should be preserved + // Other CRS values should NOT be detected as lon/lat let metadata = Metadata::new(Some("srid:1234"), None); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, r#"{"crs":"srid:1234"}"#); + assert!(!metadata.crs_is_lon_lat()); - // Canonicalization should work with algorithm field - let metadata = Metadata::new(Some("EPSG:4326"), Some(Edges::Spherical)); - let wkb = WkbType::new(Some(metadata)); - let serialized = wkb.serialize_metadata().unwrap(); - assert_eq!(serialized, r#"{"algorithm":"spherical"}"#); + let metadata = Metadata::new(Some("EPSG:3857"), None); + assert!(!metadata.crs_is_lon_lat()); + + // None CRS should NOT be detected as lon/lat + let metadata = Metadata::new(None, None); + assert!(!metadata.crs_is_lon_lat()); Ok(()) } diff --git a/parquet/src/arrow/schema/extension.rs b/parquet/src/arrow/schema/extension.rs index 8a1ee649756d..51e1f1dbd5aa 100644 --- a/parquet/src/arrow/schema/extension.rs +++ b/parquet/src/arrow/schema/extension.rs @@ -179,15 +179,31 @@ pub(crate) fn logical_type_for_binary(field: &Field) -> Option { match field.extension_type_name() { Some(n) if n == WkbType::NAME => match field.try_extension_type::() { - Ok(wkb_type) => match wkb_type.metadata().type_hint() { - WkbTypeHint::Geometry => Some(LogicalType::geometry( - wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), - )), - WkbTypeHint::Geography => Some(LogicalType::geography( - wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), - wkb_type.metadata().algorithm.map(|a| a.into()), - )), - }, + Ok(wkb_type) => { + // Convert Arrow CRS to Parquet CRS: + // - None → "srid:0" (unset CRS in Parquet) + // - lon/lat CRS (OGC:CRS84, EPSG:4326) → None (default in Parquet) + // - Other CRS → JSON string + let crs = match &wkb_type.metadata().crs { + None => Some("srid:0".to_string()), + Some(_) if wkb_type.metadata().crs_is_lon_lat() => None, + Some(c) => Some(c.to_string()), + }; + // Convert Arrow edges to Parquet algorithm: + // - Spherical → None (default for Geography) + // - Other algorithms → Some(algorithm) + let algorithm = wkb_type.metadata().algorithm.and_then(|a| { + use parquet_geospatial::WkbEdges; + match a { + WkbEdges::Spherical => None, // spherical is the default + _ => Some(a.into()), + } + }); + match wkb_type.metadata().type_hint() { + WkbTypeHint::Geometry => Some(LogicalType::geometry(crs)), + WkbTypeHint::Geography => Some(LogicalType::geography(crs, algorithm)), + } + } Err(_e) => None, }, _ => None, diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs index 97a4c3224f8a..feb0f53f7536 100644 --- a/parquet/tests/geospatial.rs +++ b/parquet/tests/geospatial.rs @@ -565,7 +565,7 @@ mod test { LogicalType::geometry(None), ), ( - r#"{"crs":"EPSG:4326}"#, + r#"{"crs":"EPSG:4326"}"#, LogicalType::geometry(None), ), ( From f8af1c7114c18fd94885d1f14d3e0de62b5b7508 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 17:49:27 -0500 Subject: [PATCH 5/5] fmt --- parquet/tests/geospatial.rs | 55 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs index feb0f53f7536..d7a6f5b067d4 100644 --- a/parquet/tests/geospatial.rs +++ b/parquet/tests/geospatial.rs @@ -435,15 +435,9 @@ mod test { // Test cases: (LogicalType, expected metadata JSON) let test_cases = [ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet spec) - ( - LogicalType::geometry(None), - r#"{"crs":"OGC:CRS84"}"#, - ), + (LogicalType::geometry(None), r#"{"crs":"OGC:CRS84"}"#), // Geometry with srid:0 should result in an unset (omitted) CRS - ( - LogicalType::geometry(Some("srid:0".to_string())), - r#"{}"#, - ), + (LogicalType::geometry(Some("srid:0".to_string())), r#"{}"#), // Geometry with custom CRSes (authority:code and partial projjson) ( LogicalType::geometry(Some("EPSG:4267".to_string())), @@ -544,10 +538,7 @@ mod test { // Test cases: (extension metadata JSON, expected LogicalType) let test_cases = [ // Geometry with no CRS should be GEOMETRY(srid:0) - ( - r#"{}"#, - LogicalType::geometry(Some("srid:0".to_string())), - ), + (r#"{}"#, LogicalType::geometry(Some("srid:0".to_string()))), // Geometry with string CRS ( r#"{"crs":"EPSG:4267"}"#, @@ -556,18 +547,14 @@ mod test { // Geometry with PROJJSON CRS ( r#"{"crs":{"id":{"authority":"EPSG","code":3857}}}"#, - LogicalType::geometry(Some(r#"{"id":{"authority":"EPSG","code":3857}}"#.to_string())), + LogicalType::geometry(Some( + r#"{"id":{"authority":"EPSG","code":3857}}"#.to_string(), + )), ), // Geometry with lon/lat CRSes (canonically removed because lon/lat is the // default Parquet CRS) - ( - r#"{"crs":"OGC:CRS84"}"#, - LogicalType::geometry(None), - ), - ( - r#"{"crs":"EPSG:4326"}"#, - LogicalType::geometry(None), - ), + (r#"{"crs":"OGC:CRS84"}"#, LogicalType::geometry(None)), + (r#"{"crs":"EPSG:4326"}"#, LogicalType::geometry(None)), ( r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#, LogicalType::geometry(None), @@ -610,24 +597,35 @@ mod test { // Geography with custom CRS and edges ( r#"{"crs":"EPSG:4267","edges":"karney"}"#, - LogicalType::geography(Some("\"EPSG:4267\"".to_string()), Some(EdgeInterpolationAlgorithm::KARNEY)), + LogicalType::geography( + Some("\"EPSG:4267\"".to_string()), + Some(EdgeInterpolationAlgorithm::KARNEY), + ), ), // Geography with PROJJSON CRS ( r#"{"crs":{"id":{"authority":"EPSG","code":4267}},"edges":"spherical"}"#, - LogicalType::geography(Some(r#"{"id":{"authority":"EPSG","code":4267}}"#.to_string()), None), + LogicalType::geography( + Some(r#"{"id":{"authority":"EPSG","code":4267}}"#.to_string()), + None, + ), ), ]; for (ext_metadata, expected_logical_type) in test_cases { // Create an Arrow Field with raw extension metadata let metadata = HashMap::from([ - ("ARROW:extension:name".to_string(), "geoarrow.wkb".to_string()), - ("ARROW:extension:metadata".to_string(), ext_metadata.to_string()), + ( + "ARROW:extension:name".to_string(), + "geoarrow.wkb".to_string(), + ), + ( + "ARROW:extension:metadata".to_string(), + ext_metadata.to_string(), + ), ]); - let field = Arc::new( - Field::new("geom", DataType::Binary, true).with_metadata(metadata) - ); + let field = + Arc::new(Field::new("geom", DataType::Binary, true).with_metadata(metadata)); let schema = Schema::new(vec![field]); // Convert to Parquet schema @@ -644,5 +642,4 @@ mod test { ); } } - }