Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 27 additions & 47 deletions parquet-geospatial/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,10 @@ pub struct Metadata {
/// The Coordinate Reference System (CRS) of the [`WkbType`], if present.
///
/// This may be a raw string value (e.g., "EPSG:3857") or a JSON object (e.g., PROJJSON).
/// Note: Common lon/lat CRS representations (EPSG:4326, OGC:CRS84) are canonicalized
/// to `None` during serialization to match Parquet conventions.
#[serde(skip_serializing_if = "Option::is_none")]
pub crs: Option<serde_json::Value>,
/// The edge interpolation algorithm of the [`WkbType`], if present.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(rename = "edges", skip_serializing_if = "Option::is_none")]
pub algorithm: Option<Edges>,
}

Expand All @@ -88,8 +86,10 @@ impl Metadata {
}
}

/// Detect if the CRS is a common representation of lon/lat on the standard WGS84 ellipsoid
fn crs_is_lon_lat(&self) -> bool {
/// Detect if the CRS is a common representation of lon/lat on the standard WGS84 ellipsoid.
///
/// Returns `true` for OGC:CRS84, EPSG:4326, and PROJJSON representations thereof.
pub fn crs_is_lon_lat(&self) -> bool {
use serde_json::Value;

let Some(crs) = &self.crs else {
Expand Down Expand Up @@ -144,16 +144,7 @@ impl ExtensionType for WkbType {
}

fn serialize_metadata(&self) -> Option<String> {
let md = if self.0.crs_is_lon_lat() {
&Metadata {
crs: None, // lon/lat CRS is canonicalized as omitted (None) for Parquet
algorithm: self.0.algorithm,
}
} else {
&self.0
};

serde_json::to_string(md).ok()
serde_json::to_string(&self.0).ok()
}

fn deserialize_metadata(metadata: Option<&str>) -> ArrowResult<Self::Metadata> {
Expand Down Expand Up @@ -251,7 +242,7 @@ mod tests {
let wkb = WkbType::new(Some(metadata));

let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, r#"{"algorithm":"spherical"}"#);
assert_eq!(serialized, r#"{"edges":"spherical"}"#);

let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
assert!(deserialized.crs.is_none());
Expand All @@ -267,7 +258,7 @@ mod tests {
let wkb = WkbType::new(Some(metadata));

let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, r#"{"crs":"srid:1234","algorithm":"spherical"}"#);
assert_eq!(serialized, r#"{"crs":"srid:1234","edges":"spherical"}"#);

let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
assert_eq!(
Expand Down Expand Up @@ -353,54 +344,43 @@ mod tests {
Ok(())
}

/// Test CRS canonicalization logic for common lon/lat representations
/// Test crs_is_lon_lat() detection for common lon/lat representations
#[test]
fn test_crs_canonicalization() -> ArrowResult<()> {
// EPSG:4326 as string should be omitted
fn test_crs_is_lon_lat() -> ArrowResult<()> {
// EPSG:4326 as string should be detected
let metadata = Metadata::new(Some("EPSG:4326"), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, "{}");
assert!(metadata.crs_is_lon_lat());

// OGC:CRS84 as string should be omitted
// OGC:CRS84 as string should be detected
let metadata = Metadata::new(Some("OGC:CRS84"), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, "{}");
assert!(metadata.crs_is_lon_lat());

// A JSON object that reasonably looks like PROJJSON for EPSG:4326 should be omitted
// A JSON object that reasonably looks like PROJJSON for EPSG:4326 should be detected
// detect "4326" as a string
let crs_json = r#"{"id":{"authority":"EPSG","code":"4326"}}"#;
let metadata = Metadata::new(Some(crs_json), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, "{}");
assert!(metadata.crs_is_lon_lat());

// detect 4326 as a number
let crs_json = r#"{"id":{"authority":"EPSG","code":4326}}"#;
let metadata = Metadata::new(Some(crs_json), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, "{}");
assert!(metadata.crs_is_lon_lat());

// A JSON object that reasonably looks like PROJJSON for OGC:CRS84 should be omitted
// A JSON object that reasonably looks like PROJJSON for OGC:CRS84 should be detected
let crs_json = r#"{"id":{"authority":"OGC","code":"CRS84"}}"#;
let metadata = Metadata::new(Some(crs_json), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, "{}");
assert!(metadata.crs_is_lon_lat());

// Other input types should be preserved
// Other CRS values should NOT be detected as lon/lat
let metadata = Metadata::new(Some("srid:1234"), None);
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, r#"{"crs":"srid:1234"}"#);
assert!(!metadata.crs_is_lon_lat());

// Canonicalization should work with algorithm field
let metadata = Metadata::new(Some("EPSG:4326"), Some(Edges::Spherical));
let wkb = WkbType::new(Some(metadata));
let serialized = wkb.serialize_metadata().unwrap();
assert_eq!(serialized, r#"{"algorithm":"spherical"}"#);
let metadata = Metadata::new(Some("EPSG:3857"), None);
assert!(!metadata.crs_is_lon_lat());

// None CRS should NOT be detected as lon/lat
let metadata = Metadata::new(None, None);
assert!(!metadata.crs_is_lon_lat());

Ok(())
}
Expand Down
50 changes: 39 additions & 11 deletions parquet/src/arrow/schema/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,13 @@ pub(crate) fn try_add_extension_type(
}
#[cfg(feature = "geospatial")]
LogicalType::Geometry(geometry) => {
let md = parquet_geospatial::WkbMetadata::new(geometry.crs.as_deref(), None);
// Per Parquet spec: omitted CRS defaults to OGC:CRS84, srid:0 means unset CRS
let crs = match geometry.crs.as_deref() {
None => Some("OGC:CRS84"),
Some("srid:0") => None,
Some(crs) => Some(crs),
};
let md = parquet_geospatial::WkbMetadata::new(crs, None);
let mut arrow_field = arrow_field;
arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?;
arrow_field
Expand All @@ -78,7 +84,13 @@ pub(crate) fn try_add_extension_type(
.algorithm()
.map(|a| a.try_as_edges())
.transpose()?;
let md = parquet_geospatial::WkbMetadata::new(geography.crs.as_deref(), algorithm);
// Per Parquet spec: omitted CRS defaults to OGC:CRS84, srid:0 means unset CRS
let crs = match geography.crs.as_deref() {
None => Some("OGC:CRS84"),
Some("srid:0") => None,
Some(crs) => Some(crs),
};
let md = parquet_geospatial::WkbMetadata::new(crs, algorithm);
let mut arrow_field = arrow_field;
arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?;
arrow_field
Expand Down Expand Up @@ -167,15 +179,31 @@ pub(crate) fn logical_type_for_binary(field: &Field) -> Option<LogicalType> {

match field.extension_type_name() {
Some(n) if n == WkbType::NAME => match field.try_extension_type::<WkbType>() {
Ok(wkb_type) => match wkb_type.metadata().type_hint() {
WkbTypeHint::Geometry => Some(LogicalType::geometry(
wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
)),
WkbTypeHint::Geography => Some(LogicalType::geography(
wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
wkb_type.metadata().algorithm.map(|a| a.into()),
)),
},
Ok(wkb_type) => {
// Convert Arrow CRS to Parquet CRS:
// - None → "srid:0" (unset CRS in Parquet)
// - lon/lat CRS (OGC:CRS84, EPSG:4326) → None (default in Parquet)
// - Other CRS → JSON string
let crs = match &wkb_type.metadata().crs {
None => Some("srid:0".to_string()),
Some(_) if wkb_type.metadata().crs_is_lon_lat() => None,
Some(c) => Some(c.to_string()),
};
// Convert Arrow edges to Parquet algorithm:
// - Spherical → None (default for Geography)
// - Other algorithms → Some(algorithm)
let algorithm = wkb_type.metadata().algorithm.and_then(|a| {
use parquet_geospatial::WkbEdges;
match a {
WkbEdges::Spherical => None, // spherical is the default
_ => Some(a.into()),
}
});
match wkb_type.metadata().type_hint() {
WkbTypeHint::Geometry => Some(LogicalType::geometry(crs)),
WkbTypeHint::Geography => Some(LogicalType::geography(crs, algorithm)),
}
}
Err(_e) => None,
},
_ => None,
Expand Down
Loading
Loading