-
Notifications
You must be signed in to change notification settings - Fork 1
fix: refine narrative schema pack #19501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| { | ||
| "$schema": "https://json-schema.org/draft/2020-12/schema", | ||
| "$id": "https://summit.example.com/schemas/provenance/receipt.schema.json", | ||
| "title": "ProvenanceReceipt", | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "required": [ | ||
| "version", | ||
| "provenance_id", | ||
| "generated_by", | ||
| "inputs", | ||
| "transforms", | ||
| "outputs" | ||
| ], | ||
| "properties": { | ||
| "version": { | ||
| "type": "string", | ||
| "minLength": 1 | ||
| }, | ||
| "provenance_id": { | ||
| "type": "string", | ||
| "pattern": "^sha256:[a-f0-9]{64}$" | ||
| }, | ||
| "generated_by": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "required": [ | ||
| "pipeline", | ||
| "pipeline_version", | ||
| "model", | ||
| "model_version", | ||
| "policy_version" | ||
| ], | ||
| "properties": { | ||
| "pipeline": {"type": "string", "minLength": 1}, | ||
| "pipeline_version": {"type": "string", "minLength": 1}, | ||
| "model": {"type": "string", "minLength": 1}, | ||
| "model_version": {"type": "string", "minLength": 1}, | ||
| "policy_version": {"type": "string", "minLength": 1} | ||
| } | ||
| }, | ||
| "inputs": { | ||
| "type": "array", | ||
| "minItems": 1, | ||
| "items": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "required": ["artifact_id", "content_hash"], | ||
| "properties": { | ||
| "artifact_id": { | ||
| "type": "string", | ||
| "pattern": "^sha256:[a-f0-9]{64}$" | ||
| }, | ||
| "content_hash": { | ||
| "type": "string", | ||
| "pattern": "^sha256:[a-f0-9]{64}$" | ||
| }, | ||
| "span": {"type": "string"} | ||
| } | ||
| } | ||
| }, | ||
| "transforms": { | ||
| "type": "array", | ||
| "minItems": 1, | ||
| "items": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "required": ["name", "hash"], | ||
| "properties": { | ||
| "name": {"type": "string", "minLength": 1}, | ||
| "hash": { | ||
| "type": "string", | ||
| "pattern": "^sha256:[a-f0-9]{64}$" | ||
| } | ||
| } | ||
| } | ||
| }, | ||
| "outputs": { | ||
| "type": "array", | ||
| "minItems": 1, | ||
| "items": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "required": ["type", "id", "confidence", "normal_form"], | ||
| "properties": { | ||
| "type": { | ||
| "type": "string", | ||
| "enum": ["Claim", "Frame", "Assumption"] | ||
| }, | ||
| "id": { | ||
| "type": "string", | ||
| "pattern": "^sha256:[a-f0-9]{64}$" | ||
| }, | ||
| "confidence": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "maximum": 1 | ||
| }, | ||
| "normal_form": {"type": "string", "minLength": 1} | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,198 @@ | ||
| version: 1 | ||
| namespace: summit.narratives | ||
| id_policy: | ||
| algo: sha256 | ||
| canonicalization: unicode_nfkc_whitespace_collapse | ||
| nodes: | ||
| Artifact: | ||
| description: Observed content item (post/article/video transcript chunk) | ||
| keys: [artifact_id] | ||
| props: | ||
| artifact_id: {type: string, required: true} | ||
| uri: {type: string, required: false} | ||
| platform: {type: string, required: false} | ||
| content_hash: {type: string, required: true} | ||
| lang: {type: string, required: false} | ||
| text_excerpt: {type: string, required: false} | ||
| media_type: {type: string, required: false} # text|image|video|audio|mixed | ||
| collected_at_ref: {type: string, required: false} # pointer to stamp.json record | ||
| integrity: | ||
| type: object | ||
| required: false | ||
| shape: | ||
| hash_algo: {type: string} | ||
| hash: {type: string} | ||
| size_bytes: {type: integer} | ||
|
Comment on lines
+23
to
+25
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The properties hash_algo: {type: string, required: true}
hash: {type: string, required: true}
size_bytes: {type: integer, required: true} |
||
| Actor: | ||
| keys: [actor_id] | ||
| props: | ||
| actor_id: {type: string, required: true} | ||
| handle: {type: string, required: false} | ||
| display_name: {type: string, required: false} | ||
| actor_type: {type: string, required: false} # individual|org|media|bot_suspected|unknown | ||
| tier: {type: string, required: false} # fringe|influencer|semi_legit|mainstream|official | ||
| Narrative: | ||
| keys: [narrative_id] | ||
| props: | ||
| narrative_id: {type: string, required: true} | ||
| label: {type: string, required: false} | ||
| state: {type: string, required: true} # seeded|contested|normalized|institutionalized|exhausted|dormant|reactivated | ||
| first_seen_ref: {type: string, required: false} | ||
| last_seen_ref: {type: string, required: false} | ||
| topic_tags: {type: list, items: string, required: false} | ||
| Frame: | ||
| keys: [frame_id] | ||
| props: | ||
| frame_id: {type: string, required: true} | ||
| narrative_id: {type: string, required: true} | ||
| frame_normal_form: {type: string, required: true} | ||
| polarity_target: {type: string, required: false} | ||
| solution_hook: {type: string, required: false} | ||
| Claim: | ||
| keys: [claim_id] | ||
| props: | ||
| claim_id: {type: string, required: true} | ||
| claim_normal_form: {type: string, required: true} | ||
| verifiability: {type: string, required: false} # verifiable|nonverifiable|normative|question | ||
| stance: {type: string, required: false} # assert|deny|question|imply | ||
| Assumption: | ||
| keys: [assumption_id] | ||
| props: | ||
| assumption_id: {type: string, required: true} | ||
| assumption_normal_form: {type: string, required: true} | ||
| scope: {type: string, required: false} # local|narrative|cross_narrative | ||
| Community: | ||
| keys: [community_id] | ||
| props: | ||
| community_id: {type: string, required: true} | ||
| method: {type: string, required: false} # louvain|leiden|labelprop|manual | ||
| label: {type: string, required: false} | ||
| Event: | ||
| keys: [event_id] | ||
| props: | ||
| event_id: {type: string, required: true} | ||
| event_type: {type: string, required: false} | ||
| summary: {type: string, required: false} | ||
| time_ref: {type: string, required: false} | ||
| Evidence: | ||
| keys: [evidence_id] | ||
| props: | ||
| evidence_id: {type: string, required: true} | ||
| evidence_type: {type: string, required: false} # report|dataset|factcheck|official_statement|doc | ||
| uri: {type: string, required: false} | ||
| excerpt: {type: string, required: false} | ||
|
|
||
| relationships: | ||
| PUBLISHED: | ||
| from: Actor | ||
| to: Artifact | ||
| props: | ||
| published_ref: {type: string, required: false} | ||
| IN_COMMUNITY: | ||
| from: Actor | ||
| to: Community | ||
| props: {} | ||
| MENTIONS: | ||
| from: Artifact | ||
| to: Actor | ||
| props: {} | ||
| COMPOSED_OF: | ||
| from: Narrative | ||
| to: Frame | ||
| props: | ||
| weight: {type: number, required: false} | ||
| EXPRESSES: | ||
| from: Artifact | ||
| to: Frame | ||
| props: | ||
| confidence: {type: number, required: true} | ||
| method: {type: string, required: true} # classifier|rules|hybrid | ||
| provenance_id: {type: string, required: true} | ||
| MAKES: | ||
| from: Artifact | ||
| to: Claim | ||
| props: | ||
| confidence: {type: number, required: true} | ||
| span: {type: string, required: false} # stable pointer e.g., char offsets "120:184" | ||
| provenance_id: {type: string, required: true} | ||
| SUPPORTED_BY: | ||
| from: Frame | ||
| to: Assumption | ||
| props: | ||
| confidence: {type: number, required: true} | ||
| provenance_id: {type: string, required: true} | ||
| AMPLIFIED: | ||
| from: Community | ||
| to: Artifact | ||
| props: | ||
| weight: {type: number, required: false} | ||
| observed_ref: {type: string, required: false} | ||
| TRIGGERED_BY: | ||
| from: Narrative | ||
| to: Event | ||
| props: | ||
| confidence: {type: number, required: false} | ||
| provenance_id: {type: string, required: false} | ||
| DISPUTED_BY: | ||
| from: Claim | ||
| to: Evidence | ||
| props: | ||
| confidence: {type: number, required: false} | ||
| provenance_id: {type: string, required: false} | ||
| BROKEN_BY: | ||
| from: Assumption | ||
| to: Evidence | ||
| props: | ||
| confidence: {type: number, required: false} | ||
| provenance_id: {type: string, required: false} | ||
| HANDOFF_TO: | ||
| from: Narrative | ||
| to: Community | ||
| props: | ||
| handoff_score: {type: number, required: true} | ||
| from_tier: {type: string, required: false} | ||
| to_tier: {type: string, required: false} | ||
| observed_ref: {type: string, required: false} | ||
| provenance_id: {type: string, required: true} | ||
|
|
||
| indexes: | ||
| - label: Artifact | ||
| props: [artifact_id] | ||
| - label: Actor | ||
| props: [actor_id, handle] | ||
| - label: Narrative | ||
| props: [narrative_id, state] | ||
| - label: Frame | ||
| props: [frame_id, narrative_id] | ||
| - label: Assumption | ||
| props: [assumption_id] | ||
| - label: Claim | ||
| props: [claim_id] | ||
|
Comment on lines
+158
to
+170
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The indexes:
- label: Artifact
props: [artifact_id]
- label: Actor
props: [actor_id, handle]
- label: Narrative
props: [narrative_id, state]
- label: Frame
props: [frame_id, narrative_id]
- label: Assumption
props: [assumption_id]
- label: Claim
props: [claim_id]
- label: Community
props: [community_id]
- label: Event
props: [event_id]
- label: Evidence
props: [evidence_id] |
||
| constraints: | ||
| - type: unique | ||
| label: Artifact | ||
| prop: artifact_id | ||
| - type: unique | ||
| label: Actor | ||
| prop: actor_id | ||
| - type: unique | ||
| label: Narrative | ||
| prop: narrative_id | ||
| - type: unique | ||
| label: Frame | ||
| prop: frame_id | ||
| - type: unique | ||
| label: Claim | ||
| prop: claim_id | ||
| - type: unique | ||
| label: Assumption | ||
| prop: assumption_id | ||
| - type: unique | ||
| label: Community | ||
| prop: community_id | ||
| - type: unique | ||
| label: Event | ||
| prop: event_id | ||
| - type: unique | ||
| label: Evidence | ||
| prop: evidence_id | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| CREATE CONSTRAINT artifact_id_unique IF NOT EXISTS | ||
| FOR (a:Artifact) REQUIRE a.artifact_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT actor_id_unique IF NOT EXISTS | ||
| FOR (a:Actor) REQUIRE a.actor_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT narrative_id_unique IF NOT EXISTS | ||
| FOR (n:Narrative) REQUIRE n.narrative_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT frame_id_unique IF NOT EXISTS | ||
| FOR (f:Frame) REQUIRE f.frame_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT claim_id_unique IF NOT EXISTS | ||
| FOR (c:Claim) REQUIRE c.claim_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT assumption_id_unique IF NOT EXISTS | ||
| FOR (s:Assumption) REQUIRE s.assumption_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT community_id_unique IF NOT EXISTS | ||
| FOR (c:Community) REQUIRE c.community_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT event_id_unique IF NOT EXISTS | ||
| FOR (e:Event) REQUIRE e.event_id IS UNIQUE; | ||
|
|
||
| CREATE CONSTRAINT evidence_id_unique IF NOT EXISTS | ||
| FOR (e:Evidence) REQUIRE e.evidence_id IS UNIQUE; | ||
|
|
||
| CREATE INDEX actor_handle_idx IF NOT EXISTS | ||
| FOR (a:Actor) ON (a.handle); | ||
|
|
||
| CREATE INDEX narrative_state_idx IF NOT EXISTS | ||
| FOR (n:Narrative) ON (n.state); | ||
|
|
||
| CREATE INDEX frame_narrative_id_idx IF NOT EXISTS | ||
| FOR (f:Frame) ON (f.narrative_id); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| CREATE TABLE IF NOT EXISTS narrative_run ( | ||
| run_id TEXT PRIMARY KEY, | ||
| created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), | ||
| pipeline_version TEXT NOT NULL, | ||
| model_versions JSONB NOT NULL, | ||
| input_window_start TIMESTAMPTZ, | ||
| input_window_end TIMESTAMPTZ, | ||
| stamp_ref TEXT | ||
| ); | ||
|
|
||
| CREATE TABLE IF NOT EXISTS narrative_metric ( | ||
| run_id TEXT NOT NULL REFERENCES narrative_run(run_id), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To improve referential integrity, consider adding run_id TEXT NOT NULL REFERENCES narrative_run(run_id) ON DELETE CASCADE, |
||
| narrative_id TEXT NOT NULL, | ||
| metric_name TEXT NOT NULL, | ||
| metric_value DOUBLE PRECISION NOT NULL, | ||
| metric_dim JSONB, | ||
| PRIMARY KEY (run_id, narrative_id, metric_name) | ||
| ); | ||
|
|
||
| CREATE TABLE IF NOT EXISTS narrative_transition ( | ||
| run_id TEXT NOT NULL REFERENCES narrative_run(run_id), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| narrative_id TEXT NOT NULL, | ||
| from_state TEXT NOT NULL, | ||
| to_state TEXT NOT NULL, | ||
| rationale JSONB, | ||
| PRIMARY KEY (run_id, narrative_id, from_state, to_state) | ||
| ); | ||
|
|
||
| CREATE TABLE IF NOT EXISTS narrative_handoff ( | ||
| run_id TEXT NOT NULL REFERENCES narrative_run(run_id), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| narrative_id TEXT NOT NULL, | ||
| from_tier TEXT, | ||
| to_tier TEXT, | ||
| handoff_score DOUBLE PRECISION NOT NULL, | ||
| community_id TEXT, | ||
| provenance_id TEXT NOT NULL, | ||
| PRIMARY KEY (run_id, narrative_id, provenance_id) | ||
| ); | ||
|
|
||
| CREATE INDEX IF NOT EXISTS narrative_metric_narrative_idx | ||
| ON narrative_metric (narrative_id); | ||
|
|
||
| CREATE INDEX IF NOT EXISTS narrative_transition_narrative_idx | ||
| ON narrative_transition (narrative_id); | ||
|
|
||
| CREATE INDEX IF NOT EXISTS narrative_handoff_narrative_idx | ||
| ON narrative_handoff (narrative_id); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
spanproperty is currently defined as a generic string. To improve data validation and clarity, consider adding apatternto enforce a specific format, such as character offsets (e.g., '120:184'). Adescriptionwould also be helpful for consumers of the schema.