diff --git a/.github/workflows/mirror.yml b/.github/workflows/mirror.yml index cc3f9711..11e1d61d 100644 --- a/.github/workflows/mirror.yml +++ b/.github/workflows/mirror.yml @@ -27,7 +27,10 @@ jobs: - name: Generate backend files run: | mkdir tmp/ - cargo run --locked --release --bin upki-mirror -- tmp/ production --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" + cargo run --locked --release --bin mozilla-crlite -- tmp/ production --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" + # backwards compatible name + cp tmp/v1-revocation-manifest.json tmp/manifest.json + cargo run --locked --release --bin intermediates -- tmp/ --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" - name: Package and upload artifact uses: actions/upload-pages-artifact@v4 diff --git a/Cargo.lock b/Cargo.lock index 416ea68a..db7ffec7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2679,9 +2679,11 @@ version = "0.1.0" dependencies = [ "aws-lc-rs", "clap", + "csv", "eyre", "hex", "reqwest", + "rustls-pki-types", "serde", "serde_json", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 4c03e85f..76e21d33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ chrono = { version = "0.4.42", features = ["alloc"], default-features = false } clap = { version = "4.5", features = ["derive"] } clubcard-crlite = "0.3.2" criterion = "0.8" +csv = "1.4" directories = "6" eyre = "0.6" hex = { version = "0.4", features = ["serde"] } diff --git a/upki-mirror/Cargo.toml b/upki-mirror/Cargo.toml index 85158d21..c35c810d 100644 --- a/upki-mirror/Cargo.toml +++ b/upki-mirror/Cargo.toml @@ -9,9 +9,11 @@ repository.workspace = true [dependencies] aws-lc-rs.workspace = true clap.workspace = true +csv.workspace = true eyre.workspace = true hex.workspace = true reqwest.workspace = true +rustls-pki-types.workspace = true serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/upki-mirror/src/bin/intermediates.rs b/upki-mirror/src/bin/intermediates.rs new file mode 100644 index 00000000..1268b85b --- /dev/null +++ b/upki-mirror/src/bin/intermediates.rs @@ -0,0 +1,156 @@ +use core::time::Duration; +use std::collections::HashMap; +use std::fs; +use std::path::PathBuf; +use std::time::SystemTime; + +use aws_lc_rs::digest::{SHA256, digest}; +use clap::Parser; +use eyre::{Context, Report, anyhow}; +use rustls_pki_types::CertificateDer; +use rustls_pki_types::pem::PemObject; +use serde::Deserialize; +use upki::data::{Manifest, ManifestFile}; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Report> { + let opts = Opts::try_parse()?; + + let client = reqwest::Client::builder() + .use_rustls_tls() + .timeout(Duration::from_secs(opts.http_timeout_secs)) + .user_agent(format!( + "{} v{} ({})", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + env!("CARGO_PKG_REPOSITORY") + )) + .build() + .wrap_err("failed to create HTTP client")?; + + let response = client + .get("https://ccadb.my.salesforce-sites.com/mozilla/MozillaIntermediateCertsCSVReport") + .send() + .await + .wrap_err("records request failed")?; + + if !response.status().is_success() { + return Err(anyhow!( + "HTTP error for records request: {}", + response.status() + )); + } + + let csv_bytes = response + .bytes() + .await + .wrap_err("failed to receive CSV body")?; + + let intermediates = csv::ReaderBuilder::new() + .has_headers(true) + .from_reader(&mut csv_bytes.as_ref()) + .into_deserialize::() + .collect::, _>>() + .wrap_err("failed to parse CSV")?; + + println!("we have {} intermediates", intermediates.len()); + + // we bucket intermediates into up to 256 files, by the first byte of the + // sha256-hash of their DER value. + // + // that means the manifest contains up to 256 items, and the filenames are small. + let mut buckets: HashMap> = HashMap::new(); + + for i in intermediates { + let der = CertificateDer::from_pem_slice(i.pem.as_bytes()).wrap_err("cannot parse PEM")?; + + // check hash matches + let actual_hash = digest(&SHA256, &der); + if i.sha256 != actual_hash.as_ref() { + return Err(anyhow!("cert {i:?} does not have correct hash")); + } + + let bucket = i.sha256[0]; + buckets + .entry(bucket) + .or_default() + .push(i); + } + + let mut files = Vec::new(); + for (bucket, certs) in buckets { + let filename = format!("{bucket:02x?}.pem",); + + let mut contents = String::new(); + for inter in certs { + contents.push_str(&inter.pem); + contents.push('\n'); + } + + fs::write(opts.output_dir.join(&filename), &contents).wrap_err("cannot write PEM file")?; + let hash = digest(&SHA256, contents.as_bytes()); + + files.push(ManifestFile { + filename, + size: contents.len(), + hash: hash.as_ref().to_vec(), + }); + } + + let manifest = Manifest { + generated_at: SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(), + comment: opts.manifest_comment.clone(), + files, + }; + let output_filename = opts + .output_dir + .join("v1-intermediates-manifest.json"); + fs::write( + output_filename, + serde_json::to_string(&manifest) + .wrap_err("cannot encode JSON manifest")? + .as_bytes(), + ) + .wrap_err_with(|| "cannot write manifest to {output_filename:?}")?; + + Ok(()) +} + +#[derive(Debug, Parser)] +struct Opts { + /// Where to write output files. This must exist. + output_dir: PathBuf, + + /// Timeout in seconds for all HTTP requests. + #[clap(long, default_value_t = 10)] + http_timeout_secs: u64, + + /// Comment included in output manifest. + #[clap(long, default_value = "")] + manifest_comment: String, +} + +#[non_exhaustive] +#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize)] +pub struct IntermediateData { + #[serde(rename = "Subject")] + pub subject: String, + + #[serde(rename = "Issuer")] + pub issuer: String, + + #[serde(rename = "SHA256", with = "hex::serde")] + pub sha256: [u8; 32], + + #[serde(rename = "Full CRL Issued By This CA")] + pub full_crl: String, + + #[serde(rename = "PEM")] + pub pem: String, + + #[serde(rename = "JSON Array of Partitioned CRLs")] + pub json_crls: String, +} diff --git a/upki-mirror/src/main.rs b/upki-mirror/src/bin/mozilla-crlite.rs similarity index 81% rename from upki-mirror/src/main.rs rename to upki-mirror/src/bin/mozilla-crlite.rs index dce19c99..39625dd2 100644 --- a/upki-mirror/src/main.rs +++ b/upki-mirror/src/bin/mozilla-crlite.rs @@ -7,9 +7,7 @@ use std::time::SystemTime; use aws_lc_rs::digest::{SHA256, digest}; use clap::{Parser, ValueEnum}; use eyre::{Context, Report, anyhow}; -use upki::revocation::{Filter, Manifest}; - -mod mozilla; +use upki::data::{Manifest, ManifestFile}; #[tokio::main(flavor = "current_thread")] async fn main() -> Result<(), Report> { @@ -68,7 +66,7 @@ async fn main() -> Result<(), Report> { download_plan.push(item); } - let mut filters = Vec::new(); + let mut files = Vec::new(); for p in download_plan { let attachment_url = source.attachment_host.to_string() + &p.attachment.location; @@ -100,7 +98,7 @@ async fn main() -> Result<(), Report> { fs::write(&output_filename, bytes) .wrap_err_with(|| format!("cannot write filter data to {output_filename:?}",))?; - filters.push(Filter { + files.push(ManifestFile { filename: p.attachment.filename.clone(), size: p.attachment.size, hash: p.attachment.hash.clone(), @@ -113,9 +111,11 @@ async fn main() -> Result<(), Report> { .unwrap() .as_secs(), comment: opts.manifest_comment.clone(), - filters, + files, }; - let output_filename = opts.output_dir.join("manifest.json"); + let output_filename = opts + .output_dir + .join("v1-revocation-manifest.json"); fs::write( output_filename, serde_json::to_string(&manifest) @@ -168,3 +168,38 @@ const MOZILLA_PROD: Source = Source { records_url: "https://firefox.settings.services.mozilla.com/v1/buckets/security-state/collections/cert-revocations/records", attachment_host: "https://firefox-settings-attachments.cdn.mozilla.net/", }; + +/// JSON structures used in the Mozilla preferences service. +mod mozilla { + use serde::Deserialize; + + #[derive(Debug, Deserialize)] + pub(crate) struct Manifest { + pub(crate) data: Vec, + } + + #[derive(Clone, Debug, Deserialize)] + pub(crate) struct Item { + pub(crate) attachment: Attachment, + pub(crate) channel: Channel, + pub(crate) id: String, + pub(crate) incremental: bool, + pub(crate) parent: Option, + } + + #[derive(Clone, Debug, Deserialize, PartialEq)] + #[serde(rename_all = "snake_case")] + pub(crate) enum Channel { + Default, + Compat, + } + + #[derive(Clone, Debug, Deserialize)] + pub(crate) struct Attachment { + #[serde(with = "hex::serde")] + pub hash: Vec, + pub size: usize, + pub filename: String, + pub location: String, + } +} diff --git a/upki-mirror/src/mozilla.rs b/upki-mirror/src/mozilla.rs deleted file mode 100644 index a05ccdc3..00000000 --- a/upki-mirror/src/mozilla.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! JSON structures used in the Mozilla preferences service. - -use serde::Deserialize; - -#[derive(Debug, Deserialize)] -pub(crate) struct Manifest { - pub(crate) data: Vec, -} - -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct Item { - pub(crate) attachment: Attachment, - pub(crate) channel: Channel, - pub(crate) id: String, - pub(crate) incremental: bool, - pub(crate) parent: Option, -} - -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "snake_case")] -pub(crate) enum Channel { - Default, - Compat, -} - -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct Attachment { - #[serde(with = "hex::serde")] - pub hash: Vec, - pub size: usize, - pub filename: String, - pub location: String, -} diff --git a/upki/src/data.rs b/upki/src/data.rs new file mode 100644 index 00000000..acd33ff2 --- /dev/null +++ b/upki/src/data.rs @@ -0,0 +1,398 @@ +use core::fmt; +use core::time::Duration; +use std::collections::HashSet; +use std::env; +use std::fs::{self, File, Permissions}; +use std::io::{self, Read}; +#[cfg(target_family = "unix")] +use std::os::unix::fs::PermissionsExt; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; + +use aws_lc_rs::digest; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use tracing::{debug, info}; + +use crate::revocation::Error; + +/// The structure contained in a manifest.json +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Manifest { + /// When this file was generated. + /// + /// UNIX timestamp in seconds. + pub generated_at: u64, + + /// Some human-readable text. + pub comment: String, + + /// List of files. + #[serde(rename = "filters")] + pub files: Vec, +} + +impl Manifest { + /// Logs metadata fields in this manifest. + pub fn introduce(&self) -> Result<(), Error> { + let dt = match DateTime::::from_timestamp(self.generated_at as i64, 0) { + Some(dt) => dt.to_rfc3339(), + None => { + return Err(Error::InvalidTimestamp { + input: self.generated_at.to_string(), + context: "manifest generated (in s)", + }); + } + }; + + info!(comment = self.comment, date = dt, "parsed manifest"); + Ok(()) + } +} + +/// Manifest data for a single disk file. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct ManifestFile { + /// Relative filename. + /// + /// This is also the suggested local filename. + pub filename: String, + + /// File size, indicative. Allows a fetcher to predict data usage. + pub size: usize, + + /// SHA256 hash of file contents. + #[serde(with = "hex::serde")] + pub hash: Vec, +} + +pub(crate) async fn fetch_inner( + dry_run: bool, + fetch_url: &str, + manifest_url: String, + manifest_file_name: String, + cache_dir: PathBuf, +) -> Result { + info!("fetching {fetch_url} into {cache_dir:?}..."); + let client = reqwest::Client::builder() + .use_rustls_tls() + .timeout(Duration::from_secs(REQUEST_TIMEOUT)) + .user_agent(format!( + "{} v{} ({})", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + env!("CARGO_PKG_REPOSITORY") + )) + .build() + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: manifest_url.clone(), + })?; + + let response = client + .get(&manifest_url) + .send() + .await + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: manifest_url.clone(), + })? + .error_for_status() + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: manifest_url.clone(), + })?; + + let manifest = response + .json::() + .await + .map_err(|error| Error::ManifestDecode { + error: Box::new(error), + path: None, + })?; + + manifest.introduce()?; + + let plan = Plan::construct(&manifest, fetch_url, &cache_dir, manifest_file_name)?; + + if dry_run { + println!( + "{} steps required ({} bytes to download)", + plan.steps.len(), + plan.download_bytes() + ); + for step in plan.steps { + println!("- {step}"); + } + return Ok(ExitCode::SUCCESS); + } + + info!( + "{} steps required ({} bytes to download).", + plan.steps.len(), + plan.download_bytes() + ); + + for step in plan.steps { + step.execute(&client).await?; + } + + info!("success"); + Ok(ExitCode::SUCCESS) +} + +pub(crate) struct Plan { + steps: Vec, +} + +impl Plan { + /// Form a plan of how to synchronize with the remote server. + /// + /// - `manifest` describes the contents of the remote server. + /// - `remote_url` is the base URL. + /// - `local` is the path into which files are downloaded. The caller ensures this exists. + /// - `manifest_file_name` is the file name of the manifest, which will be placed into `local`. + pub(crate) fn construct( + manifest: &Manifest, + remote_url: &str, + local: &Path, + manifest_file_name: String, + ) -> Result { + let mut steps = Vec::new(); + + // Collect unwanted files for deletion + let mut unwanted_files = HashSet::new(); + + if local.exists() { + let iter = fs::read_dir(local).map_err(|error| Error::CreateDirectory { + error, + path: local.to_owned(), + })?; + + for entry in iter { + let entry = match entry { + Ok(e) => e, + Err(error) => return Err(Error::FilterRead { error, path: None }), + }; + + let path = Path::new(&entry.file_name()).to_owned(); + let name = path.to_string_lossy(); + if name.ends_with(".filter") || name.ends_with(".delta") { + unwanted_files.insert(path); + } + } + } else { + steps.push(PlanStep::CreateDir(local.to_owned())); + } + + for filter in &manifest.files { + unwanted_files.remove(Path::new(&filter.filename)); + + let path = local.join(&filter.filename); + match hash_file(&path) { + Ok(digest) if digest.as_ref() == filter.hash => continue, + _ => {} + } + + steps.push(PlanStep::download(filter, remote_url, local)); + } + + steps.push(PlanStep::SaveManifest { + manifest: manifest.clone(), + local_dir: local.to_owned(), + file_name: manifest_file_name.clone(), + }); + + for filename in unwanted_files { + steps.push(PlanStep::Delete(local.join(filename))); + } + + Ok(Self { steps }) + } + + /// How many bytes will we download? + pub(crate) fn download_bytes(&self) -> usize { + self.steps + .iter() + .filter_map(|s| match s { + PlanStep::Download { file, .. } => Some(file.size), + _ => None, + }) + .sum() + } +} + +/// One step moving closer to local sync with the remote contents. +enum PlanStep { + CreateDir(PathBuf), + + /// Download `filter` from `remote` to `local` + Download { + file: ManifestFile, + /// URL. + remote_url: String, + /// Full path to output file. + local: PathBuf, + }, + + /// Delete the given single local file. + Delete(PathBuf), + + /// Save the manifest structure + SaveManifest { + manifest: Manifest, + local_dir: PathBuf, + file_name: String, + }, +} + +impl PlanStep { + async fn execute(self, client: &reqwest::Client) -> Result<(), Error> { + match self { + Self::CreateDir(path) => { + fs::create_dir_all(&path).map_err(|error| Error::CreateDirectory { error, path })? + } + Self::Download { + file, + remote_url, + local, + } => { + debug!("downloading {:?}", file); + + let response = client + .get(&remote_url) + .send() + .await + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: remote_url.clone(), + })? + .error_for_status() + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: remote_url.clone(), + })?; + + fs::write( + &local, + response + .bytes() + .await + .map_err(|error| Error::HttpFetch { + error: Box::new(error), + url: remote_url.clone(), + })?, + ) + .map_err(|error| Error::FileWrite { + error, + path: local.clone(), + })?; + + match hash_file(&local) { + Ok(digest) if digest.as_ref() == file.hash => {} + Ok(_) => return Err(Error::HashMismatch(local)), + Err(error) => { + return Err(Error::FilterRead { + error, + path: Some(local), + }); + } + } + + debug!("download successful"); + } + Self::Delete(target) => { + debug!("deleting unreferenced file {target:?}"); + fs::remove_file(&target).map_err(|error| Error::RemoveFile { + error, + path: target, + })?; + } + Self::SaveManifest { + manifest, + local_dir, + file_name, + } => { + debug!("saving manifest"); + #[cfg(target_family = "unix")] + let temp = tempfile::Builder::new() + .permissions(Permissions::from_mode(0o644)) + .suffix(".new") + .tempfile_in(&local_dir); + #[cfg(not(target_family = "unix"))] + let temp = tempfile::Builder::new() + .suffix(".new") + .tempfile_in(&local_dir); + + let mut local_temp = temp.map_err(|error| Error::ManifestWrite { + error, + path: local_dir.clone(), + })?; + + serde_json::to_writer(local_temp.as_file_mut(), &manifest).map_err(|error| { + Error::ManifestEncode { + error: Box::new(error), + path: local_temp.path().to_owned(), + } + })?; + + let path = local_dir.join(file_name); + local_temp + .persist(&path) + .map_err(|error| Error::ManifestWrite { + error: error.error, + path, + })?; + } + } + + Ok(()) + } + + fn download(file: &ManifestFile, remote_url: &str, local: &Path) -> Self { + Self::Download { + file: file.clone(), + remote_url: format!("{remote_url}{}", file.filename), + local: local.join(&file.filename), + } + } +} + +impl fmt::Display for PlanStep { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::CreateDir(path) => write!(f, "create directory {path:?}"), + Self::Download { + file, + remote_url, + local, + } => write!( + f, + "download {} bytes from {remote_url} to {local:?}", + file.size + ), + Self::Delete(path) => write!(f, "delete stale file {path:?}"), + Self::SaveManifest { local_dir, .. } => { + write!(f, "save new manifest into {local_dir:?}") + } + } + } +} + +fn hash_file(path: &Path) -> Result { + let mut file = File::open(path)?; + let mut hasher = digest::Context::new(&digest::SHA256); + let mut buffer = [0; 4096]; + loop { + let n = file.read(&mut buffer)?; + if n == 0 { + break; + } + + hasher.update(&buffer[..n]); + } + + Ok(hasher.finish()) +} + +const REQUEST_TIMEOUT: u64 = 30; diff --git a/upki/src/intermediates.rs b/upki/src/intermediates.rs new file mode 100644 index 00000000..20ff6347 --- /dev/null +++ b/upki/src/intermediates.rs @@ -0,0 +1,48 @@ +use std::process::ExitCode; + +use serde::{Deserialize, Serialize}; + +use crate::Config; +use crate::data::fetch_inner; +use crate::revocation::Error; + +/// Update the local intermediates cache by fetching updates over the network. +/// +/// `dry_run` means this call fetches the new manifest, but does not fetch any +/// required files; but the necessary files are printed to stdout. Therefore +/// such a call is not completely "dry" -- perhaps "moist". +pub async fn fetch(dry_run: bool, config: &Config) -> Result { + let Some(intermediates) = &config.intermediates else { + return Ok(ExitCode::SUCCESS); + }; + let manifest_url = format!("{}{MANIFEST_JSON}", intermediates.fetch_url); + fetch_inner( + dry_run, + &intermediates.fetch_url, + manifest_url, + MANIFEST_JSON.to_string(), + config.intermediates_cache_dir(), + ) + .await +} + +pub(super) const MANIFEST_JSON: &str = "v1-intermediates.json"; + +/// Details about intermediate preloading. +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct IntermediatesConfig { + /// Whether to fetch things at all. + enabled: bool, + /// Where to fetch intermediate certificates. + fetch_url: String, +} + +impl Default for IntermediatesConfig { + fn default() -> Self { + Self { + enabled: false, + fetch_url: "https://upki.rustls.dev/".into(), + } + } +} diff --git a/upki/src/lib.rs b/upki/src/lib.rs index a1d22979..11e5d858 100644 --- a/upki/src/lib.rs +++ b/upki/src/lib.rs @@ -7,6 +7,7 @@ use std::{fmt, fs, io}; use serde::{Deserialize, Serialize}; +use crate::intermediates::IntermediatesConfig; use crate::revocation::RevocationConfig; /// `upki` configuration. @@ -18,6 +19,9 @@ pub struct Config { /// Configuration for crlite-style revocation. pub revocation: RevocationConfig, + + /// Configuration for intermediate preloading. + pub intermediates: Option, } impl Config { @@ -50,12 +54,17 @@ impl Config { Ok(Self { cache_dir: platform::default_cache_dir()?, revocation: RevocationConfig::default(), + intermediates: Some(IntermediatesConfig::default()), }) } pub(crate) fn revocation_cache_dir(&self) -> PathBuf { self.cache_dir.join("revocation") } + + fn intermediates_cache_dir(&self) -> PathBuf { + self.cache_dir.join("intermediates") + } } /// How the path to a configuration file was decided upon. @@ -199,3 +208,9 @@ const CONFIG_FILE: &str = "config.toml"; /// Determining revocation status of publicly trusted certificates. pub mod revocation; + +/// Fetching intermediate certificates to assist chain building. +pub mod intermediates; + +/// Data storage. +pub mod data; diff --git a/upki/src/main.rs b/upki/src/main.rs index bdb7db44..c4274ede 100644 --- a/upki/src/main.rs +++ b/upki/src/main.rs @@ -8,8 +8,9 @@ use clap::{Parser, Subcommand}; use eyre::{Context, Report}; use rustls_pki_types::CertificateDer; use rustls_pki_types::pem::PemObject; -use upki::revocation::{Manifest, RevocationCheckInput, fetch}; +use upki::revocation::{Manifest, RevocationCheckInput}; use upki::{Config, ConfigPath}; +use upki::{intermediates, revocation}; #[tokio::main(flavor = "current_thread")] async fn main() -> Result { @@ -33,7 +34,15 @@ async fn main() -> Result { let config = Config::from_file_or_default(&config_path)?; Ok(match args.command { - Command::Fetch { dry_run } => fetch(dry_run, &config).await?, + Command::Fetch { dry_run } => { + match ( + revocation::fetch(dry_run, &config).await?, + intermediates::fetch(dry_run, &config).await?, + ) { + (ExitCode::SUCCESS, ExitCode::SUCCESS) => ExitCode::SUCCESS, + (..) => ExitCode::FAILURE, + } + } Command::Verify => Manifest::from_config(&config)?.verify(&config)?, Command::ShowConfigPath => unreachable!(), Command::ShowConfig => { diff --git a/upki/src/revocation/fetch.rs b/upki/src/revocation/fetch.rs index 0d5df543..ebbe1417 100644 --- a/upki/src/revocation/fetch.rs +++ b/upki/src/revocation/fetch.rs @@ -6,22 +6,11 @@ //! the local filesystem contents. Finally, the plan is executed. If that succeeds //! the remote server contents matches the local filesystem. -use core::fmt; -use core::time::Duration; -use std::collections::HashSet; -use std::env; -use std::fs::{self, File, Permissions}; -use std::io::{self, Read}; -#[cfg(target_family = "unix")] -use std::os::unix::fs::PermissionsExt; -use std::path::{Path, PathBuf}; use std::process::ExitCode; -use aws_lc_rs::digest; -use tracing::{debug, info}; - -use super::{Error, Filter, Manifest}; +use super::Error; use crate::Config; +use crate::data::fetch_inner; /// Update the local revocation cache by fetching updates over the network. /// @@ -29,328 +18,15 @@ use crate::Config; /// required files; but the necessary files are printed to stdout. Therefore /// such a call is not completely "dry" -- perhaps "moist". pub async fn fetch(dry_run: bool, config: &Config) -> Result { - let cache_dir = config.revocation_cache_dir(); - info!( - "fetching {} into {:?}...", - &config.revocation.fetch_url, &cache_dir, - ); - let manifest_url = format!("{}{MANIFEST_JSON}", config.revocation.fetch_url); - let client = reqwest::Client::builder() - .use_rustls_tls() - .timeout(Duration::from_secs(REQUEST_TIMEOUT)) - .user_agent(format!( - "{} v{} ({})", - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION"), - env!("CARGO_PKG_REPOSITORY") - )) - .build() - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: manifest_url.clone(), - })?; - - let response = client - .get(&manifest_url) - .send() - .await - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: manifest_url.clone(), - })? - .error_for_status() - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: manifest_url.clone(), - })?; - - let manifest = response - .json::() - .await - .map_err(|error| Error::ManifestDecode { - error: Box::new(error), - path: None, - })?; - - manifest.introduce()?; - - let plan = Plan::construct(&manifest, &config.revocation.fetch_url, &cache_dir)?; - - if dry_run { - println!( - "{} steps required ({} bytes to download)", - plan.steps.len(), - plan.download_bytes() - ); - for step in plan.steps { - println!("- {step}"); - } - return Ok(ExitCode::SUCCESS); - } - - info!( - "{} steps required ({} bytes to download).", - plan.steps.len(), - plan.download_bytes() - ); - - for step in plan.steps { - step.execute(&client).await?; - } - - info!("success"); - Ok(ExitCode::SUCCESS) -} - -pub(crate) struct Plan { - steps: Vec, -} - -impl Plan { - /// Form a plan of how to synchronize with the remote server. - /// - /// - `manifest` describes the contents of the remote server. - /// - `remote_url` is the base URL. - /// - `local` is the path into which files are downloaded. The caller ensures this exists. - pub(crate) fn construct( - manifest: &Manifest, - remote_url: &str, - local: &Path, - ) -> Result { - let mut steps = Vec::new(); - - // Collect unwanted files for deletion - let mut unwanted_files = HashSet::new(); - - if local.exists() { - let iter = fs::read_dir(local).map_err(|error| Error::CreateDirectory { - error, - path: local.to_owned(), - })?; - - for entry in iter { - let entry = match entry { - Ok(e) => e, - Err(error) => return Err(Error::FilterRead { error, path: None }), - }; - - let path = Path::new(&entry.file_name()).to_owned(); - let name = path.to_string_lossy(); - if name.ends_with(".filter") || name.ends_with(".delta") { - unwanted_files.insert(path); - } - } - } else { - steps.push(PlanStep::CreateDir(local.to_owned())); - } - - for filter in &manifest.filters { - unwanted_files.remove(Path::new(&filter.filename)); - - let path = local.join(&filter.filename); - match hash_file(&path) { - Ok(digest) if digest.as_ref() == filter.hash => continue, - _ => {} - } - - steps.push(PlanStep::download(filter, remote_url, local)); - } - - steps.push(PlanStep::SaveManifest { - manifest: manifest.clone(), - local_dir: local.to_owned(), - }); - - for filename in unwanted_files { - steps.push(PlanStep::Delete(local.join(filename))); - } - - Ok(Self { steps }) - } - - /// How many bytes will we download? - pub(crate) fn download_bytes(&self) -> usize { - self.steps - .iter() - .filter_map(|s| match s { - PlanStep::Download { filter, .. } => Some(filter.size), - _ => None, - }) - .sum() - } -} - -/// One step moving closer to local sync with the remote contents. -enum PlanStep { - CreateDir(PathBuf), - - /// Download `filter` from `remote` to `local` - Download { - filter: Filter, - /// URL. - remote_url: String, - /// Full path to output file. - local: PathBuf, - }, - - /// Delete the given single local file. - Delete(PathBuf), - - /// Save the manifest structure - SaveManifest { - manifest: Manifest, - local_dir: PathBuf, - }, -} - -impl PlanStep { - async fn execute(self, client: &reqwest::Client) -> Result<(), Error> { - match self { - Self::CreateDir(path) => { - fs::create_dir_all(&path).map_err(|error| Error::CreateDirectory { error, path })? - } - Self::Download { - filter, - remote_url, - local, - } => { - debug!("downloading {:?}", filter); - - let response = client - .get(&remote_url) - .send() - .await - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: remote_url.clone(), - })? - .error_for_status() - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: remote_url.clone(), - })?; - - fs::write( - &local, - response - .bytes() - .await - .map_err(|error| Error::HttpFetch { - error: Box::new(error), - url: remote_url.clone(), - })?, - ) - .map_err(|error| Error::FileWrite { - error, - path: local.clone(), - })?; - - match hash_file(&local) { - Ok(digest) if digest.as_ref() == filter.hash => {} - Ok(_) => return Err(Error::HashMismatch(local)), - Err(error) => { - return Err(Error::FilterRead { - error, - path: Some(local), - }); - } - } - - debug!("download successful"); - } - Self::Delete(target) => { - debug!("deleting unreferenced file {target:?}"); - fs::remove_file(&target).map_err(|error| Error::RemoveFile { - error, - path: target, - })?; - } - Self::SaveManifest { - manifest, - local_dir, - } => { - debug!("saving manifest"); - #[cfg(target_family = "unix")] - let temp = tempfile::Builder::new() - .permissions(Permissions::from_mode(0o644)) - .suffix(".new") - .tempfile_in(&local_dir); - #[cfg(not(target_family = "unix"))] - let temp = tempfile::Builder::new() - .suffix(".new") - .tempfile_in(&local_dir); - - let mut local_temp = temp.map_err(|error| Error::ManifestWrite { - error, - path: local_dir.clone(), - })?; - - serde_json::to_writer(local_temp.as_file_mut(), &manifest).map_err(|error| { - Error::ManifestEncode { - error: Box::new(error), - path: local_temp.path().to_owned(), - } - })?; - - let path = local_dir.join(MANIFEST_JSON); - local_temp - .persist(&path) - .map_err(|error| Error::ManifestWrite { - error: error.error, - path, - })?; - } - } - - Ok(()) - } - - fn download(filter: &Filter, remote_url: &str, local: &Path) -> Self { - Self::Download { - filter: filter.clone(), - remote_url: format!("{remote_url}{}", filter.filename), - local: local.join(&filter.filename), - } - } -} - -impl fmt::Display for PlanStep { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::CreateDir(path) => write!(f, "create directory {path:?}"), - Self::Download { - filter, - remote_url, - local, - } => write!( - f, - "download {} bytes from {remote_url} to {local:?}", - filter.size - ), - Self::Delete(path) => write!(f, "delete stale file {path:?}"), - Self::SaveManifest { local_dir, .. } => { - write!(f, "save new manifest into {local_dir:?}") - } - } - } -} - -fn hash_file(path: &Path) -> Result { - let mut file = File::open(path)?; - let mut hasher = digest::Context::new(&digest::SHA256); - let mut buffer = [0; 4096]; - loop { - let n = file.read(&mut buffer)?; - if n == 0 { - break; - } - - hasher.update(&buffer[..n]); - } - - Ok(hasher.finish()) + fetch_inner( + dry_run, + &config.revocation.fetch_url, + manifest_url, + MANIFEST_JSON.to_string(), + config.revocation_cache_dir(), + ) + .await } -const MANIFEST_JSON: &str = "manifest.json"; -const REQUEST_TIMEOUT: u64 = 30; +pub(super) const MANIFEST_JSON: &str = "manifest.json"; diff --git a/upki/src/revocation/mod.rs b/upki/src/revocation/mod.rs index 8794014e..925e0b89 100644 --- a/upki/src/revocation/mod.rs +++ b/upki/src/revocation/mod.rs @@ -1,5 +1,6 @@ use core::error::Error as StdError; use core::fmt; +use core::ops::Deref; use core::str::FromStr; use std::fs::{self, File}; use std::io::{self, BufReader}; @@ -9,38 +10,25 @@ use std::process::ExitCode; use aws_lc_rs::digest; use base64::Engine; use base64::prelude::BASE64_STANDARD; -use chrono::{DateTime, Utc}; use clubcard_crlite::{CRLiteClubcard, CRLiteKey, CRLiteStatus}; use rustls_pki_types::{CertificateDer, TrustAnchor}; use serde::{Deserialize, Serialize}; -use tracing::info; use crate::Config; +use crate::data::Plan; mod fetch; -use fetch::Plan; pub use fetch::fetch; /// The structure contained in a manifest.json #[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Manifest { - /// When this file was generated. - /// - /// UNIX timestamp in seconds. - pub generated_at: u64, - - /// Some human-readable text. - pub comment: String, - - /// List of filter files. - pub filters: Vec, -} +pub struct Manifest(crate::data::Manifest); impl Manifest { /// Load the revocation manifest from the cache directory specified in the configuration. pub fn from_config(config: &Config) -> Result { let mut file_name = config.revocation_cache_dir(); - file_name.push("manifest.json"); + file_name.push(fetch::MANIFEST_JSON); let file = match File::open(&file_name) { Ok(f) => f, @@ -72,7 +60,7 @@ impl Manifest { ) -> Result { let key = input.key(); let cache_dir = config.revocation_cache_dir(); - for f in &self.filters { + for f in &self.files { let path = cache_dir.join(&f.filename); let bytes = match fs::read(&path) { Ok(bytes) => bytes, @@ -111,44 +99,25 @@ impl Manifest { /// This performs disk IO but does not perform network IO. pub fn verify(&self, config: &Config) -> Result { self.introduce()?; - let plan = Plan::construct(self, "https://.../", &config.revocation_cache_dir())?; + let plan = Plan::construct( + self, + "https://.../", + &config.revocation_cache_dir(), + fetch::MANIFEST_JSON.to_string(), + )?; match plan.download_bytes() { 0 => Ok(ExitCode::SUCCESS), bytes => Err(Error::Outdated(bytes)), } } - - /// Logs metadata fields in this manifest. - pub fn introduce(&self) -> Result<(), Error> { - let dt = match DateTime::::from_timestamp(self.generated_at as i64, 0) { - Some(dt) => dt.to_rfc3339(), - None => { - return Err(Error::InvalidTimestamp { - input: self.generated_at.to_string(), - context: "manifest generated (in s)", - }); - } - }; - - info!(comment = self.comment, date = dt, "parsed manifest"); - Ok(()) - } } -/// Manifest data for a single crlite filter file. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Filter { - /// Relative filename. - /// - /// This is also the suggested local filename. - pub filename: String, - - /// File size, indicative. Allows a fetcher to predict data usage. - pub size: usize, +impl Deref for Manifest { + type Target = crate::data::Manifest; - /// SHA256 hash of file contents. - #[serde(with = "hex::serde")] - pub hash: Vec, + fn deref(&self) -> &Self::Target { + &self.0 + } } /// Input parameters for a revocation check. diff --git a/upki/tests/integration.rs b/upki/tests/integration.rs index 97e4c487..b8401822 100644 --- a/upki/tests/integration.rs +++ b/upki/tests/integration.rs @@ -36,7 +36,7 @@ fn config_unknown_fields() { .arg("--config-file") .arg("tests/data/config_unknown_fields/config.toml") .arg("show-config"), - @r###" + @r#" success: false exit_code: 1 ----- stdout ----- @@ -49,12 +49,12 @@ fn config_unknown_fields() { | 1 | cache_dir = "tests/data/config_unknown_fields/" | ^^^^^^^^^ - unknown field `cache_dir`, expected `cache-dir` or `revocation` + unknown field `cache_dir`, expected one of `cache-dir`, `revocation`, `intermediates` Location: upki/src/main.rs:[LINE]:[COLUMN] - "###); + "#); } #[test]