diff options
-rw-r--r-- | notes/2022-08-12_changelog_gap.md | 41 | ||||
-rw-r--r-- | rust/.gitignore | 1 | ||||
-rw-r--r-- | rust/src/bin/fatcat-doctor.rs | 97 |
3 files changed, 138 insertions, 1 deletions
diff --git a/notes/2022-08-12_changelog_gap.md b/notes/2022-08-12_changelog_gap.md new file mode 100644 index 00000000..48572973 --- /dev/null +++ b/notes/2022-08-12_changelog_gap.md @@ -0,0 +1,41 @@ + +On 2022-08-11, realized that we had a "gap" in the changelog: after a VM +reboot, the postgresql primary key sequence for the 'changelog' table had been +incremented, but rows were not inserted (transaction hadn't finished). + +This was a known potential problem (naively relying on the sequence to +increment with no gaps). + +As a work-around, implemented a simple "gap filler" which will create +empty/dummy editgroups and changelog entries. + +This gap extends from 6153703 to 6153721, so just a couple dozen entries. The +fixup command was: + + ./target/release/fatcat-doctor backfill-changelog-gap 6153702 6153721 + Inserted changelog: 6153703 + Inserted changelog: 6153704 + Inserted changelog: 6153705 + Inserted changelog: 6153706 + Inserted changelog: 6153707 + Inserted changelog: 6153708 + Inserted changelog: 6153709 + Inserted changelog: 6153710 + Inserted changelog: 6153711 + Inserted changelog: 6153712 + Inserted changelog: 6153713 + Inserted changelog: 6153714 + Inserted changelog: 6153715 + Inserted changelog: 6153716 + Inserted changelog: 6153717 + Inserted changelog: 6153718 + Inserted changelog: 6153719 + Inserted changelog: 6153720 + Inserted changelog: 6153721 + +After that the changelog worker was happy: + + Aug 13 02:41:59 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Most recent changelog index in Kafka seems to be 6153702 + Aug 13 02:41:59 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Fetching changelogs from 6153703 through 6158547 + Aug 13 02:43:12 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Sleeping 5.0 seconds... + diff --git a/rust/.gitignore b/rust/.gitignore index 03e50598..f237ae31 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -1,6 +1,5 @@ .env target/ -bin/ fatcat-*.tar.gz !.cargo diff --git a/rust/src/bin/fatcat-doctor.rs b/rust/src/bin/fatcat-doctor.rs new file mode 100644 index 00000000..6e869634 --- /dev/null +++ b/rust/src/bin/fatcat-doctor.rs @@ -0,0 +1,97 @@ +//! Database cleanup tool + +use clap::{value_t_or_exit, App, SubCommand}; + +use fatcat::database_models::*; +use fatcat::database_schema::*; +use fatcat::errors::Result; +use fatcat::identifiers::FatcatId; +use fatcat::server; +use fatcat::server::DbConn; +use std::process; +use std::str::FromStr; + +use diesel; +use diesel::prelude::*; + +fn backfill_changelog_gap(conn: &DbConn, last_good: i64, max_index: i64) -> Result<()> { + // sanity check arguments against database + assert!(last_good > 0); + assert!(max_index > 0); + assert!(last_good < max_index); + let highest_row: ChangelogRow = changelog::table.order(changelog::id.desc()).first(conn)?; + assert!(highest_row.id >= max_index); + + // default values + // 'root' editor_id is aaaaaaaaaaaabkvkaaaaaaaaae + // 'admin' editor_id is aaaaaaaaaaaabkvkaaaaaaaaai + let editor_id = FatcatId::from_str("aaaaaaaaaaaabkvkaaaaaaaaai").unwrap(); + let description = "Backfill of missing changelog entries due to database id gap"; + + // fetch the last entry before the gap, to re-use the timestamp + let existing_row: ChangelogRow = changelog::table.find(last_good).first(conn)?; + + for index in last_good + 1..max_index + 1 { + // ensure this index is actually a gap + let count: i64 = changelog::table + .filter(changelog::id.eq(index)) + .count() + .get_result(conn)?; + if count != 0 { + println!("Found existing changelog: {}", index); + return Ok(()); + } + + // create dummy empty editgroup, then add a changelog entry + let eg_row: EditgroupRow = diesel::insert_into(editgroup::table) + .values(( + editgroup::editor_id.eq(editor_id.to_uuid()), + editgroup::created.eq(existing_row.timestamp), + editgroup::is_accepted.eq(true), + editgroup::description.eq(Some(description)), + )) + .get_result(conn)?; + let _entry_row: ChangelogRow = diesel::insert_into(changelog::table) + .values(( + changelog::id.eq(index), + changelog::editgroup_id.eq(eg_row.id), + changelog::timestamp.eq(existing_row.timestamp), + )) + .get_result(conn)?; + println!("Inserted changelog: {}", index); + } + Ok(()) +} + +fn main() -> Result<()> { + let m = App::new("fatcat-doctor") + .version(env!("CARGO_PKG_VERSION")) + .author("Bryan Newbold <bnewbold@archive.org>") + .about("Database cleanup / fixup tool") + .subcommand( + SubCommand::with_name("backfill-changelog-gap") + .about("Inserts dummy changelog entries and editgroups for gap") + .args_from_usage( + "<start> 'changelog index of entry just before gap' + <end> 'highest changelog index to backfill'", + ), + ) + .get_matches(); + + let db_conn = server::database_worker_pool()? + .get() + .expect("database pool"); + match m.subcommand() { + ("backfill-changelog-gap", Some(subm)) => { + let last_good: i64 = value_t_or_exit!(subm.value_of("start"), i64); + let max_index: i64 = value_t_or_exit!(subm.value_of("end"), i64); + backfill_changelog_gap(&db_conn, last_good, max_index)?; + } + _ => { + println!("Missing or unimplemented command!"); + println!("{}", m.usage()); + process::exit(-1); + } + } + Ok(()) +} |