Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions crates/assemble/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ pub fn inference(shape: &Shape, exists: Exists) -> flow::Inference {
shape::Redact::Unset => flow::inference::Redact::Unset,
};

let content_media_type = shape
.content_media_type
.as_deref()
.unwrap_or_default()
.to_string();

flow::Inference {
types: shape.type_.to_vec(),
exists: exists as i32,
Expand All @@ -66,14 +72,12 @@ pub fn inference(shape: &Shape, exists: Exists) -> flow::Inference {
.unwrap_or_default(),
default_json,
secret: shape.secret.unwrap_or_default(),
// `content_media_type` lives at the top level. For back-compat,
// `Inference.String.content_type` (below) carries the same value
// whenever the projection's type includes "string".
string: if shape.type_.overlaps(types::STRING) {
Some(flow::inference::String {
content_type: shape
.string
.content_type
.clone()
.map(Into::into)
.unwrap_or_default(),
content_type: content_media_type.clone(),
format: shape
.string
.format
Expand Down Expand Up @@ -135,6 +139,7 @@ pub fn inference(shape: &Shape, exists: Exists) -> flow::Inference {
enum_json_vec,
reduce: reduce as i32,
redact: redact as i32,
content_media_type,
}
}

Expand Down Expand Up @@ -601,10 +606,10 @@ mod test {
description: Some("the description".into()),
title: Some("the title".into()),
secret: Some(true),
content_media_type: Some("a/type".into()),
string: StringShape {
content_encoding: Some("BaSE64".into()),
format: Some(json::schema::formats::Format::DateTime),
content_type: Some("a/type".into()),
min_length: 10,
max_length: Some(123),
},
Expand Down
4 changes: 4 additions & 0 deletions crates/assemble/src/snapshots/assemble__test__inference.snap
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ expression: "&[out1, out2, out3, out4]"
],
reduce: Sum,
redact: Sha256,
content_media_type: "a/type",
},
Inference {
types: [
Expand All @@ -50,6 +51,7 @@ expression: "&[out1, out2, out3, out4]"
],
reduce: Sum,
redact: Sha256,
content_media_type: "a/type",
},
Inference {
types: [
Expand Down Expand Up @@ -85,6 +87,7 @@ expression: "&[out1, out2, out3, out4]"
],
reduce: Sum,
redact: Sha256,
content_media_type: "a/type",
},
Inference {
types: [
Expand Down Expand Up @@ -116,5 +119,6 @@ expression: "&[out1, out2, out3, out4]"
],
reduce: Sum,
redact: Sha256,
content_media_type: "a/type",
},
]
33 changes: 30 additions & 3 deletions crates/doc/src/shape/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ impl Shape {
shape.string.content_encoding = Some((&**enc).into());
}
Annotation::Core(CoreAnnotation::ContentMediaType(mt)) => {
shape.string.content_type = Some((&**mt).into());
shape.content_media_type = Some((&**mt).into());
}
Annotation::Core(CoreAnnotation::Format(format)) => {
shape.string.format = Some(*format);
Expand Down Expand Up @@ -476,6 +476,9 @@ mod test {
- reduce: {strategy: firstWriteWins}
- redact: {strategy: sha256}
- default: john.doe@gmail.com
- oneOf:
- contentMediaType: some/thing
- contentMediaType: some/thing
anyOf:
- contentEncoding: base64
- type: object # Elided (impossible).
Expand Down Expand Up @@ -547,9 +550,9 @@ mod test {
None,
))),
secret: Some(true),
content_media_type: Some("some/thing".into()),
string: StringShape {
content_encoding: Some("base64".into()),
content_type: Some("some/thing".into()),
format: Some(Format::Email),
max_length: None,
min_length: 0,
Expand All @@ -559,6 +562,25 @@ mod test {
);
}

#[test]
fn test_one_of_branch_elided_by_outer_type_constraint() {
// A `oneOf` branch whose type is incompatible with the outer
// `$ref`-imposed type set must not leak that type into the final
// shape. Here the `null` branch is impossible given the $ref
// restricts to [string, array].
let shape = shape_from(
r#"
$defs:
aDef: {type: [string, array]}
oneOf:
- type: array
- type: 'null'
$ref: '#/$defs/aDef'
"#,
);
assert_eq!(shape.type_, types::ARRAY);
}

#[test]
fn test_multiple_reduce_and_redact() {
infer_test(
Expand Down Expand Up @@ -742,6 +764,7 @@ mod test {
provenance: Inline,
default: None,
secret: None,
content_media_type: None,
annotations: {},
array: ArrayShape {
additional_items: None,
Expand All @@ -760,7 +783,6 @@ mod test {
},
string: StringShape {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand Down Expand Up @@ -888,6 +910,7 @@ mod test {
shape: enum_fixture(json!(["b"])),
}],
additional_properties: None,
..ObjShape::new()
},
..Shape::anything()
},
Expand Down Expand Up @@ -933,6 +956,7 @@ mod test {
}],
pattern_properties: Vec::new(),
additional_properties: Some(Box::new(enum_fixture(json!(["a", "b"])))),
..ObjShape::new()
},
..Shape::anything()
},
Expand Down Expand Up @@ -1133,6 +1157,7 @@ mod test {
numeric: NumericShape {
minimum: Some(5u64.into()),
maximum: Some(10u64.into()),
..NumericShape::new()
},
..Shape::anything()
},
Expand Down Expand Up @@ -1244,6 +1269,7 @@ mod test {
shape: enum_fixture(json!(["c", "d"])),
}],
additional_properties: Some(Box::new(enum_fixture(json!([1, 2])))),
..ObjShape::new()
},
..Shape::anything()
},
Expand Down Expand Up @@ -1276,6 +1302,7 @@ mod test {
shape: enum_fixture(json!(["c", "d"])),
}],
additional_properties: Some(Box::new(Shape::nothing())),
..ObjShape::new()
},
..Shape::anything()
},
Expand Down
4 changes: 3 additions & 1 deletion crates/doc/src/shape/intersect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ impl StringShape {
};
StringShape {
content_encoding: lhs.content_encoding.or(rhs.content_encoding),
content_type: lhs.content_type.or(rhs.content_type),
format: lhs.format.or(rhs.format),
min_length: lhs.min_length.max(rhs.min_length),
max_length,
Expand Down Expand Up @@ -256,6 +255,8 @@ impl Shape {
let default = lhs.default.or(rhs.default);
let secret = lhs.secret.or(rhs.secret);

let content_media_type = lhs.content_media_type.or(rhs.content_media_type);

let mut annotations = rhs.annotations;
annotations.extend(lhs.annotations.into_iter());

Expand Down Expand Up @@ -298,6 +299,7 @@ impl Shape {
provenance,
default,
secret,
content_media_type,
annotations,
string,
array,
Expand Down
11 changes: 8 additions & 3 deletions crates/doc/src/shape/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ pub struct Shape {
pub default: Option<Box<(Value, Option<super::FailedValidation>)>>,
/// Is this location sensitive? For example, a password or credential.
pub secret: Option<bool>,
/// Annotated `contentMediaType`. The JSON-Schema specification defines
/// this annotation only for strings; Flow extends it to apply to any
/// type, so it lives at the top level rather than nested in a typed
/// sub-shape.
pub content_media_type: Option<Box<str>>,
/// Annotations are any keywords starting with `X-` or `x-`.
/// Their keys and values are collected here, without performing any
/// normalization of prefix case. Technically both `x-foo` and `X-foo` may be
Expand All @@ -52,7 +57,6 @@ pub struct Shape {
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StringShape {
pub content_encoding: Option<Box<str>>,
pub content_type: Option<Box<str>>,
pub format: Option<Format>,
pub max_length: Option<u32>,
pub min_length: u32,
Expand Down Expand Up @@ -135,7 +139,6 @@ impl StringShape {
pub const fn new() -> Self {
Self {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand Down Expand Up @@ -187,6 +190,7 @@ impl Shape {
provenance: Provenance::Unset,
default: None,
secret: None,
content_media_type: None,
annotations: BTreeMap::new(),
array: ArrayShape::new(),
numeric: NumericShape::new(),
Expand All @@ -208,6 +212,7 @@ impl Shape {
provenance: Provenance::Inline,
default: None,
secret: None,
content_media_type: None,
annotations: BTreeMap::new(),
array: ArrayShape::new(),
numeric: NumericShape::new(),
Expand Down Expand Up @@ -286,7 +291,7 @@ mod test {
fn shape_size_regression() {
use super::{ArrayShape, ObjShape, Shape, StringShape};
assert_eq!(std::mem::size_of::<ObjShape>(), 56);
assert_eq!(std::mem::size_of::<StringShape>(), 48);
assert_eq!(std::mem::size_of::<StringShape>(), 32);
assert_eq!(std::mem::size_of::<ArrayShape>(), 48);
assert_eq!(std::mem::size_of::<Shape>(), 328);
}
Expand Down
18 changes: 11 additions & 7 deletions crates/doc/src/shape/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fn to_sub_schema(shape: Shape) -> Schema {
provenance: _, // Not mapped to a schema.
default,
secret,
content_media_type,
annotations,
array,
numeric,
Expand Down Expand Up @@ -53,6 +54,16 @@ fn to_sub_schema(shape: Shape) -> Schema {
out.insert("default".to_string(), d.0);
}

// The JSON-Schema spec defines `contentMediaType` only for strings;
// Flow extends it to apply to any type, so it lives at the top level
// of Shape.
if let Some(ct) = content_media_type {
out.insert(
keywords::CONTENT_MEDIA_TYPE.to_string(),
serde_json::json!(ct),
);
}

// Object keywords.
if type_.overlaps(types::OBJECT) {
let ObjShape {
Expand Down Expand Up @@ -136,7 +147,6 @@ fn to_sub_schema(shape: Shape) -> Schema {
if type_.overlaps(types::STRING) {
let StringShape {
content_encoding,
content_type,
format,
max_length,
min_length,
Expand All @@ -148,12 +158,6 @@ fn to_sub_schema(shape: Shape) -> Schema {
serde_json::json!(encoding),
);
}
if let Some(content_type) = content_type {
out.insert(
keywords::CONTENT_MEDIA_TYPE.to_string(),
serde_json::json!(content_type),
);
}
if let Some(f) = format {
out.insert("format".to_string(), serde_json::json!(f.to_string()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Shape {
provenance: Inline,
default: None,
secret: None,
content_media_type: None,
annotations: {
"x-test-top-level": Bool(true),
},
Expand Down Expand Up @@ -43,6 +44,7 @@ Shape {
provenance: Inline,
default: None,
secret: None,
content_media_type: None,
annotations: {
"X-bar-top-level": Bool(true),
"x-bar-two": String("twoVal"),
Expand All @@ -64,7 +66,6 @@ Shape {
},
string: StringShape {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand All @@ -87,6 +88,7 @@ Shape {
provenance: Inline,
default: None,
secret: None,
content_media_type: None,
annotations: {
"x-conflicting-ann": String("yes please"),
},
Expand All @@ -107,7 +109,6 @@ Shape {
},
string: StringShape {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand All @@ -128,6 +129,7 @@ Shape {
provenance: Inline,
default: None,
secret: None,
content_media_type: None,
annotations: {
"X-foo-top-level": Bool(false),
"x-foo-one": String("oneVal"),
Expand All @@ -150,7 +152,6 @@ Shape {
},
string: StringShape {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand All @@ -161,7 +162,6 @@ Shape {
},
string: StringShape {
content_encoding: None,
content_type: None,
format: None,
max_length: None,
min_length: 0,
Expand Down
Loading
Loading