-
Notifications
You must be signed in to change notification settings - Fork 710
Genericize OAuth profiles #13477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Genericize OAuth profiles #13477
Changes from 1 commit
319fa39
633d64d
6662fb9
239b17c
12b4ca8
22b3133
9c6eadc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| -- This migration is a data backfill and has no meaningful inverse. Rolling | ||
| -- it back would require distinguishing backfilled rows from rows inserted | ||
| -- by login, which we cannot do. The `oauth_github` table itself is dropped | ||
| -- by the earlier migration 2026-01-20-162913_oauth_github_table/down.sql. | ||
| -- Intentionally a no-op. | ||
| SELECT 1; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| -- Backfill oauth_github for users who haven't logged in since the table | ||
| -- was created. Batched by id range, idempotent via ON CONFLICT DO NOTHING. | ||
|
|
||
| SET LOCAL lock_timeout = '10s'; | ||
| SET LOCAL statement_timeout = '120s'; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you elaborate on why this commit is needed? After we merged #12792, we ran the SQL in https://github.com/rust-lang/crates.io/blob/150d165e4d5ecda24b60f146589db4def7fd34bf/migrations/data_oauth_github.sql outside of the migration framework that runs on deploy, so the backfill has already been handled. |
||
|
|
||
| DO $$ | ||
| DECLARE | ||
| lo INT; | ||
| hi INT; | ||
| pos INT; | ||
| BEGIN | ||
| SELECT MIN(id), MAX(id) INTO lo, hi FROM users WHERE gh_id > 0; | ||
| IF lo IS NULL THEN RETURN; END IF; | ||
|
|
||
| pos := lo; | ||
| WHILE pos <= hi LOOP | ||
| INSERT INTO oauth_github (account_id, user_id, encrypted_token, login, avatar) | ||
| SELECT gh_id, id, gh_encrypted_token, gh_login, gh_avatar | ||
| FROM users | ||
| WHERE gh_id > 0 AND id >= pos AND id < pos + 5000 | ||
| ON CONFLICT (account_id) DO NOTHING; | ||
|
|
||
| pos := pos + 5000; | ||
| END LOOP; | ||
| END $$; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| pub mod preflight; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| //! Startup-time consistency check for the oauth_github backfill. | ||
| //! | ||
| //! Tier 1 of the profile-genericization effort cuts identity reads from | ||
| //! `users.gh_*` to `oauth_github`. That requires every user with | ||
| //! `gh_id > 0` to have a matching `oauth_github` row. The backfill migration | ||
| //! copies the data, and this module verifies the invariant on startup -- | ||
| //! if any user is missing a row the app refuses to start. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah this is not the kind of thing we'd want to fail to start the app for |
||
|
|
||
| use diesel_async::AsyncPgConnection; | ||
| use thiserror::Error; | ||
|
|
||
| #[derive(Debug, Error)] | ||
| pub enum PreflightError { | ||
| #[error("database error running oauth_github preflight: {0}")] | ||
| Database(#[from] diesel::result::Error), | ||
|
|
||
| #[error( | ||
| "{count} user(s) with gh_id > 0 are missing an oauth_github row; \ | ||
| run the backfill migration before starting the server" | ||
| )] | ||
| MissingRows { count: i64 }, | ||
| } | ||
|
|
||
| pub async fn check_oauth_github_backfill( | ||
| conn: &mut AsyncPgConnection, | ||
| ) -> Result<(), PreflightError> { | ||
| use crates_io_database::schema::{oauth_github, users}; | ||
| use diesel::dsl::count_star; | ||
| use diesel::prelude::*; | ||
| use diesel_async::RunQueryDsl; | ||
|
|
||
| let missing: i64 = users::table | ||
| .left_join(oauth_github::table.on(oauth_github::user_id.eq(users::id))) | ||
| .filter(users::gh_id.gt(0)) | ||
| .filter(oauth_github::user_id.is_null()) | ||
| .select(count_star()) | ||
| .first(conn) | ||
| .await?; | ||
|
|
||
| if missing > 0 { | ||
| tracing::error!( | ||
| missing_users = missing, | ||
| "oauth_github preflight failed: users with gh_id > 0 are missing oauth_github rows" | ||
| ); | ||
| return Err(PreflightError::MissingRows { count: missing }); | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use crates_io_database::schema::{oauth_github, users}; | ||
| use crates_io_test_db::TestDatabase; | ||
| use diesel::prelude::*; | ||
| use diesel_async::RunQueryDsl; | ||
|
|
||
| async fn setup() -> (TestDatabase, diesel_async::AsyncPgConnection) { | ||
| let db = TestDatabase::new(); | ||
| let conn = db.async_connect().await; | ||
| (db, conn) | ||
| } | ||
|
|
||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn preflight_passes_when_every_gh_user_has_oauth_row() { | ||
| let (_db, mut conn) = setup().await; | ||
|
|
||
| let user_id = diesel::insert_into(users::table) | ||
| .values(( | ||
| users::gh_id.eq(42), | ||
| users::gh_login.eq("alice"), | ||
| users::gh_encrypted_token.eq(vec![0u8; 32]), | ||
| )) | ||
| .returning(users::id) | ||
| .get_result::<i32>(&mut conn) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| diesel::insert_into(oauth_github::table) | ||
| .values(( | ||
| oauth_github::account_id.eq(42i64), | ||
| oauth_github::user_id.eq(user_id), | ||
| oauth_github::login.eq("alice"), | ||
| oauth_github::encrypted_token.eq(vec![0u8; 32]), | ||
| )) | ||
| .execute(&mut conn) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| let result = check_oauth_github_backfill(&mut conn).await; | ||
| assert!(result.is_ok(), "expected Ok(()), got {result:?}"); | ||
| } | ||
|
|
||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn preflight_fails_when_a_gh_user_has_no_oauth_row() { | ||
| let (_db, mut conn) = setup().await; | ||
|
|
||
| diesel::insert_into(users::table) | ||
| .values(( | ||
| users::gh_id.eq(99), | ||
| users::gh_login.eq("bob"), | ||
| users::gh_encrypted_token.eq(vec![0u8; 32]), | ||
| )) | ||
| .execute(&mut conn) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| let result = check_oauth_github_backfill(&mut conn).await; | ||
| match result { | ||
| Err(PreflightError::MissingRows { count: 1 }) => {} | ||
| other => panic!("expected Err(MissingRows {{ count: 1 }}), got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn preflight_ignores_synthetic_gh_id_zero_or_negative() { | ||
| let (_db, mut conn) = setup().await; | ||
|
|
||
| diesel::insert_into(users::table) | ||
| .values(( | ||
| users::gh_id.eq(-1), | ||
| users::gh_login.eq("synthetic"), | ||
| users::gh_encrypted_token.eq(vec![0u8; 32]), | ||
| )) | ||
| .execute(&mut conn) | ||
| .await | ||
| .unwrap(); | ||
|
|
||
| let result = check_oauth_github_backfill(&mut conn).await; | ||
| assert!(result.is_ok(), "expected Ok(()), got {result:?}"); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah any comment corrections should be made in a separate commit, possibly a separate PR
View changes since the review