Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This migration is a data backfill and has no meaningful inverse. Rolling
-- it back would require distinguishing backfilled rows from rows inserted
-- by login, which we cannot do. The `oauth_github` table itself is dropped
-- by the earlier migration 2026-01-20-162913_oauth_github_table/down.sql.
Copy link
Copy Markdown
Member

@carols10cents carols10cents Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah any comment corrections should be made in a separate commit, possibly a separate PR

View changes since the review

-- Intentionally a no-op.
SELECT 1;
26 changes: 26 additions & 0 deletions migrations/2026-04-15-221937-0000_backfill_oauth_github/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- Backfill oauth_github for users who haven't logged in since the table
-- was created. Batched by id range, idempotent via ON CONFLICT DO NOTHING.

SET LOCAL lock_timeout = '10s';
SET LOCAL statement_timeout = '120s';
Copy link
Copy Markdown
Member

@carols10cents carols10cents Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you elaborate on why this commit is needed? After we merged #12792, we ran the SQL in https://github.com/rust-lang/crates.io/blob/150d165e4d5ecda24b60f146589db4def7fd34bf/migrations/data_oauth_github.sql outside of the migration framework that runs on deploy, so the backfill has already been handled.

View changes since the review


DO $$
DECLARE
lo INT;
hi INT;
pos INT;
BEGIN
SELECT MIN(id), MAX(id) INTO lo, hi FROM users WHERE gh_id > 0;
IF lo IS NULL THEN RETURN; END IF;

pos := lo;
WHILE pos <= hi LOOP
INSERT INTO oauth_github (account_id, user_id, encrypted_token, login, avatar)
SELECT gh_id, id, gh_encrypted_token, gh_login, gh_avatar
FROM users
WHERE gh_id > 0 AND id >= pos AND id < pos + 5000
ON CONFLICT (account_id) DO NOTHING;

pos := pos + 5000;
END LOOP;
END $$;
13 changes: 13 additions & 0 deletions src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
extern crate tracing;

use crates_io::middleware::normalize_path::normalize_path;
use crates_io::oauth::preflight::check_oauth_github_backfill;
use crates_io::{App, Emails, metrics::LogEncoder};
use std::{sync::Arc, time::Duration};

Expand Down Expand Up @@ -73,6 +74,18 @@ fn main() -> anyhow::Result<()> {

// Block the main thread until the server has shutdown
rt.block_on(async {
// Run startup preflight checks before accepting any connections.
{
let mut conn = app
.db_write()
.await
.map_err(std::io::Error::other)?;
check_oauth_github_backfill(&mut conn)
.await
.map_err(std::io::Error::other)?;
tracing::info!("oauth_github preflight check passed");
}

// Create a `TcpListener` using tokio.
let listener = TcpListener::bind((app.config.ip, app.config.port)).await?;

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub mod index;
mod licenses;
pub mod metrics;
pub mod middleware;
pub mod oauth;
pub mod openapi;
pub mod rate_limiter;
mod router;
Expand Down
1 change: 1 addition & 0 deletions src/oauth/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod preflight;
133 changes: 133 additions & 0 deletions src/oauth/preflight.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//! Startup-time consistency check for the oauth_github backfill.
//!
//! Tier 1 of the profile-genericization effort cuts identity reads from
//! `users.gh_*` to `oauth_github`. That requires every user with
//! `gh_id > 0` to have a matching `oauth_github` row. The backfill migration
//! copies the data, and this module verifies the invariant on startup --
//! if any user is missing a row the app refuses to start.
Copy link
Copy Markdown
Member

@carols10cents carols10cents Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is not the kind of thing we'd want to fail to start the app for

View changes since the review


use diesel_async::AsyncPgConnection;
use thiserror::Error;

#[derive(Debug, Error)]
pub enum PreflightError {
#[error("database error running oauth_github preflight: {0}")]
Database(#[from] diesel::result::Error),

#[error(
"{count} user(s) with gh_id > 0 are missing an oauth_github row; \
run the backfill migration before starting the server"
)]
MissingRows { count: i64 },
}

pub async fn check_oauth_github_backfill(
conn: &mut AsyncPgConnection,
) -> Result<(), PreflightError> {
use crates_io_database::schema::{oauth_github, users};
use diesel::dsl::count_star;
use diesel::prelude::*;
use diesel_async::RunQueryDsl;

let missing: i64 = users::table
.left_join(oauth_github::table.on(oauth_github::user_id.eq(users::id)))
.filter(users::gh_id.gt(0))
.filter(oauth_github::user_id.is_null())
.select(count_star())
.first(conn)
.await?;

if missing > 0 {
tracing::error!(
missing_users = missing,
"oauth_github preflight failed: users with gh_id > 0 are missing oauth_github rows"
);
return Err(PreflightError::MissingRows { count: missing });
}

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;
use crates_io_database::schema::{oauth_github, users};
use crates_io_test_db::TestDatabase;
use diesel::prelude::*;
use diesel_async::RunQueryDsl;

async fn setup() -> (TestDatabase, diesel_async::AsyncPgConnection) {
let db = TestDatabase::new();
let conn = db.async_connect().await;
(db, conn)
}

#[tokio::test(flavor = "multi_thread")]
async fn preflight_passes_when_every_gh_user_has_oauth_row() {
let (_db, mut conn) = setup().await;

let user_id = diesel::insert_into(users::table)
.values((
users::gh_id.eq(42),
users::gh_login.eq("alice"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
))
.returning(users::id)
.get_result::<i32>(&mut conn)
.await
.unwrap();

diesel::insert_into(oauth_github::table)
.values((
oauth_github::account_id.eq(42i64),
oauth_github::user_id.eq(user_id),
oauth_github::login.eq("alice"),
oauth_github::encrypted_token.eq(vec![0u8; 32]),
))
.execute(&mut conn)
.await
.unwrap();

let result = check_oauth_github_backfill(&mut conn).await;
assert!(result.is_ok(), "expected Ok(()), got {result:?}");
}

#[tokio::test(flavor = "multi_thread")]
async fn preflight_fails_when_a_gh_user_has_no_oauth_row() {
let (_db, mut conn) = setup().await;

diesel::insert_into(users::table)
.values((
users::gh_id.eq(99),
users::gh_login.eq("bob"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
))
.execute(&mut conn)
.await
.unwrap();

let result = check_oauth_github_backfill(&mut conn).await;
match result {
Err(PreflightError::MissingRows { count: 1 }) => {}
other => panic!("expected Err(MissingRows {{ count: 1 }}), got {other:?}"),
}
}

#[tokio::test(flavor = "multi_thread")]
async fn preflight_ignores_synthetic_gh_id_zero_or_negative() {
let (_db, mut conn) = setup().await;

diesel::insert_into(users::table)
.values((
users::gh_id.eq(-1),
users::gh_login.eq("synthetic"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
))
.execute(&mut conn)
.await
.unwrap();

let result = check_oauth_github_backfill(&mut conn).await;
assert!(result.is_ok(), "expected Ok(()), got {result:?}");
}
}