diff --git a/CHANGELOG.md b/CHANGELOG.md index e1a1ee11..044753ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,20 @@ and this project adheres to a _modified_ form of _[Semantic Versioning][semver]_ [#98]: https://github.com/openlawlibrary/stelae/pull/98 [#97] https://github.com/openlawlibrary/stelae/pull/97 +## [v0.6.5] + +### Added + +- Track html data repo per publication for multi-repo support ([#106]) + +### Changed + +### Fixed + +### Removed + +[#106]: https://github.com/openlawlibrary/stelae/pull/106 + ## [v0.6.4] ### Added @@ -302,7 +316,8 @@ and this project adheres to a _modified_ form of _[Semantic Versioning][semver]_ ### Removed -[Unreleased]: https://github.com/openlawlibrary/stelae/compare/v0.6.4...HEAD +[Unreleased]: https://github.com/openlawlibrary/stelae/compare/v0.6.5...HEAD +[v0.6.5]: https://github.com/openlawlibrary/stelae/compare/v0.6.4...v0.6.5 [v0.6.4]: https://github.com/openlawlibrary/stelae/compare/v0.6.3...v0.6.4 [v0.6.3]: https://github.com/openlawlibrary/stelae/compare/v0.6.2...v0.6.3 [v0.6.2]: https://github.com/openlawlibrary/stelae/compare/v0.6.1...v0.6.2 diff --git a/Cargo.lock b/Cargo.lock index e2c63d87..fd893a00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3400,7 +3400,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stelae" -version = "0.6.4" +version = "0.6.5" dependencies = [ "actix-http", "actix-service", diff --git a/Cargo.toml b/Cargo.toml index 892e5f08..d5833dc0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "stelae" description = "A collection of tools in Rust and Python for preserving, authenticating, and accessing laws in perpetuity." -version = "0.6.4" +version = "0.6.5" edition = "2021" readme = "README.md" license = "AGPL-3.0" diff --git a/migrations/sqlite/20260323000000_add_html_data_repo_name.down.sql b/migrations/sqlite/20260323000000_add_html_data_repo_name.down.sql new file mode 100644 index 00000000..00db6f05 --- /dev/null +++ b/migrations/sqlite/20260323000000_add_html_data_repo_name.down.sql @@ -0,0 +1,4 @@ +-- Add down migration script here +PRAGMA foreign_keys = OFF; + +PRAGMA foreign_keys = ON; diff --git a/migrations/sqlite/20260323000000_add_html_data_repo_name.up.sql b/migrations/sqlite/20260323000000_add_html_data_repo_name.up.sql new file mode 100644 index 00000000..f718f7d5 --- /dev/null +++ b/migrations/sqlite/20260323000000_add_html_data_repo_name.up.sql @@ -0,0 +1,8 @@ +-- Add up migration script here +PRAGMA foreign_keys = OFF; + +ALTER TABLE publication + ADD COLUMN html_data_repo_name TEXT; + +PRAGMA foreign_keys = ON; +PRAGMA optimize; diff --git a/src/db/models/publication/manager.rs b/src/db/models/publication/manager.rs index 0d0e102f..a8f28403 100644 --- a/src/db/models/publication/manager.rs +++ b/src/db/models/publication/manager.rs @@ -48,10 +48,11 @@ impl super::TxManager for DatabaseTransaction { stele: &str, last_valid_publication_id: Option, last_valid_version: Option, + html_data_repo_name: Option, ) -> anyhow::Result> { let statement = " - INSERT OR IGNORE INTO publication ( id, name, date, stele, revoked, last_valid_publication_id, last_valid_version ) - VALUES ( $1, $2, $3, $4, FALSE, $5, $6) + INSERT OR IGNORE INTO publication ( id, name, date, stele, revoked, last_valid_publication_id, last_valid_version, html_data_repo_name ) + VALUES ( $1, $2, $3, $4, FALSE, $5, $6, $7) "; let id = sqlx::query(statement) .bind(hash_id) @@ -60,6 +61,7 @@ impl super::TxManager for DatabaseTransaction { .bind(stele) .bind(last_valid_publication_id) .bind(last_valid_version) + .bind(html_data_repo_name) .execute(&mut *self.tx) .await? .last_insert_id(); @@ -151,6 +153,31 @@ impl super::TxManager for DatabaseTransaction { Ok(row) } + /// Set `html_data_repo_name` on all publications for the given stele whose date is + /// strictly earlier than `boundary_date`. + /// + /// # Errors + /// Errors if the update cannot be executed. + async fn set_html_data_repo_name_for_prior_publications( + &mut self, + stele: &str, + boundary_date: &NaiveDate, + html_data_repo_name: &str, + ) -> anyhow::Result<()> { + let statement = " + UPDATE publication + SET html_data_repo_name = $1 + WHERE stele = $2 AND date < $3 + "; + sqlx::query(statement) + .bind(html_data_repo_name) + .bind(stele) + .bind(boundary_date.to_string()) + .execute(&mut *self.tx) + .await?; + Ok(()) + } + /// Find all publication names by date and stele. /// /// # Errors diff --git a/src/db/models/publication/mod.rs b/src/db/models/publication/mod.rs index c1ea1b00..358127d4 100644 --- a/src/db/models/publication/mod.rs +++ b/src/db/models/publication/mod.rs @@ -19,6 +19,10 @@ pub trait Manager { #[async_trait] pub trait TxManager { /// Create a new publication. + #[expect( + clippy::too_many_arguments, + reason = "Publication has many required fields; a dedicated builder struct can be added later." + )] async fn create( &mut self, hash_id: &str, @@ -27,6 +31,7 @@ pub trait TxManager { stele: &str, last_valid_publication_id: Option, last_valid_version: Option, + html_data_repo_name: Option, ) -> anyhow::Result>; /// Update a publication by name and set revoked to true. async fn update_by_name_and_stele_set_revoked_true( @@ -48,6 +53,15 @@ pub trait TxManager { name: &str, stele: &str, ) -> anyhow::Result; + /// Set `html_data_repo_name` on all publications for the given stele whose date is + /// strictly earlier than `boundary_date`. Used to backfill older publications when + /// the boundary (last-archived) publication is encountered. + async fn set_html_data_repo_name_for_prior_publications( + &mut self, + stele: &str, + boundary_date: &NaiveDate, + html_data_repo_name: &str, + ) -> anyhow::Result<()>; /// Find all by date and stele and sort by name in descending order. /// Used in revocation logic to find the latest publication. async fn find_all_by_date_and_stele_order_by_name_desc( @@ -82,6 +96,10 @@ pub struct Publication { /// represents the last publication version (codified date) from the previous publication /// that the current publication is derived from. pub last_valid_version: Option, + /// Name of the HTML data repository used to build this publication. + /// Only set when the publication was built against an archived (non-current) HTML data repo. + /// If None, the publication uses the current/default HTML data repository. + pub html_data_repo_name: Option, } impl FromRow<'_, AnyRow> for Publication { @@ -94,6 +112,7 @@ impl FromRow<'_, AnyRow> for Publication { revoked: row.try_get("revoked")?, last_valid_publication_id: row.try_get("last_valid_publication_id").ok(), last_valid_version: row.try_get("last_valid_version").ok(), + html_data_repo_name: row.try_get("html_data_repo_name").ok(), }) } } @@ -110,6 +129,7 @@ impl Publication { revoked: 0, last_valid_publication_id: None, last_valid_version: None, + html_data_repo_name: None, } } } diff --git a/src/history/changes.rs b/src/history/changes.rs index 487b0708..598070d9 100644 --- a/src/history/changes.rs +++ b/src/history/changes.rs @@ -170,7 +170,13 @@ async fn process_stele( ); return Ok(()); } - insert_changes_from_rdf_repository(tx, rdf, name).await?; + // Find the current (non-archived) HTML repo name to stamp on each publication record. + let current_html_repo_name = repositories + .get_all_by_custom_type("html") + .into_iter() + .find(|repo| !repo.is_archived()) + .map(|repo| repo.name.clone()); + insert_changes_from_rdf_repository(tx, rdf, name, current_html_repo_name.as_deref()).await?; // Insert commit hashes for data repositories with serve type 'historical' let data_repos = repositories.get_all_by_serve_type("historical"); for data_repo in data_repos { @@ -188,10 +194,11 @@ async fn insert_changes_from_rdf_repository( tx: &mut DatabaseTransaction, rdf_repo: Repo, stele_id: &str, + current_html_repo_name: Option<&str>, ) -> anyhow::Result<()> { tracing::debug!("Inserting changes from RDF repository: {}", stele_id); tracing::debug!("RDF repository path: {}", rdf_repo.path.display()); - load_delta_for_stele(tx, &rdf_repo, stele_id).await?; + load_delta_for_stele(tx, &rdf_repo, stele_id, current_html_repo_name).await?; Ok(()) } @@ -200,14 +207,22 @@ async fn load_delta_for_stele( tx: &mut DatabaseTransaction, rdf_repo: &Repo, stele: &str, + current_html_repo_name: Option<&str>, ) -> anyhow::Result<()> { stele::TxManager::create(tx, stele).await?; if let Some(publication) = publication::TxManager::find_last_inserted(tx, stele).await? { tracing::info!("[{stele}] | Inserting RDF changes from last inserted publication"); - load_delta_from_publications(tx, rdf_repo, stele, Some(publication)).await?; + load_delta_from_publications( + tx, + rdf_repo, + stele, + Some(publication), + current_html_repo_name, + ) + .await?; } else { tracing::info!("[{stele}] | Inserting RDF changes from beginning..."); - load_delta_from_publications(tx, rdf_repo, stele, None).await?; + load_delta_from_publications(tx, rdf_repo, stele, None, current_html_repo_name).await?; } Ok(()) } @@ -229,6 +244,7 @@ async fn load_delta_from_publications( rdf_repo: &Repo, stele: &str, last_inserted_publication: Option, + current_html_repo_name: Option<&str>, ) -> anyhow::Result<()> { let head_commit = rdf_repo.repo.head()?.peel_to_commit()?; let tree = head_commit.tree()?; @@ -273,6 +289,9 @@ async fn load_delta_from_publications( let pub_date = pub_graph.literal_from_triple_matching(None, Some(dcterms::available), None)?; let pub_date = NaiveDate::parse_from_str(pub_date.as_str(), "%Y-%m-%d")?; + let archived_html_repo: Option = pub_graph + .literal_from_triple_matching(None, Some(oll::archivedHtml), None) + .ok(); // continue from last inserted publication, since that publication can contain // new changes (versions) that are not in db if let Some(last_inserted_publication_date) = last_inserted_pub_date { @@ -319,6 +338,14 @@ async fn load_delta_from_publications( } else { None }; + // Determine which HTML data repo this publication belongs to. + // If the RDF carries oll:archivedHtml it's the boundary publication: use the + // archived repo name it declares. Otherwise use the current HTML repo name. + let html_data_repo_name: Option = if archived_html_repo.is_some() { + archived_html_repo.clone() + } else { + current_html_repo_name.map(ToOwned::to_owned) + }; publication::TxManager::create( tx, &publication_hash, @@ -327,8 +354,20 @@ async fn load_delta_from_publications( stele, last_inserted_pub_id, last_valid_codified_date, + html_data_repo_name, ) .await?; + // If this is the boundary publication, backfill all earlier publications for + // this stele with the same archived HTML repo name. + if let Some(archived_repo) = archived_html_repo.as_deref() { + publication::TxManager::set_html_data_repo_name_for_prior_publications( + tx, + stele, + &pub_date, + archived_repo, + ) + .await?; + } let Some(publication) = publication::TxManager::find_by_name_and_stele(tx, &pub_name, stele).await? else { diff --git a/src/history/rdf/namespaces.rs b/src/history/rdf/namespaces.rs index ba3f0d3e..1cea5e99 100644 --- a/src/history/rdf/namespaces.rs +++ b/src/history/rdf/namespaces.rs @@ -17,7 +17,8 @@ pub mod oll { url, reason, status, - libraryMaterializedPath + libraryMaterializedPath, + archivedHtml } } diff --git a/src/stelae/types/repositories.rs b/src/stelae/types/repositories.rs index bec8d3ab..00b91b3f 100644 --- a/src/stelae/types/repositories.rs +++ b/src/stelae/types/repositories.rs @@ -85,6 +85,18 @@ impl Repository { pub fn get_type(&self) -> Option { self.custom.repository_type.clone() } + + /// Returns true if this repository is marked as archived. + #[must_use] + pub fn is_archived(&self) -> bool { + self.custom.archived.unwrap_or(false) + } + + /// Returns true if this repository has a preview URL configured. + #[must_use] + pub const fn has_preview(&self) -> bool { + self.custom.preview.is_some() + } } /// Custom object @@ -114,6 +126,14 @@ pub struct Custom { /// /// When a data repository is a fallback, it is used to serve current blobs when no other data repository matches the request. pub is_fallback: Option, + /// Whether this data repository is archived (i.e. superseded by a newer repository of the same type). + /// + /// When `true`, this repository held publications built before a migration to a newer repository. + pub archived: Option, + /// Git URL of the preview repository associated with this data repository. + /// + /// When set, this repository has a corresponding preview repository used for non-production environments. + pub preview: Option, } impl Repositories {