aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2022-09-06 18:03:35 +0000
committerbnewbold <bnewbold@archive.org>2022-09-06 18:03:35 +0000
commit8bdd5fd92a33cf05424447241033bd529b68af77 (patch)
treec4bb2e93ed3c1959c9aa8edd0e395381aa60bb9c
parent30264158fc1cc261638155d4b1104cad212baa2f (diff)
parentc47e650367e65a5b785cdd8e7af3867ea7b87b8b (diff)
downloadfatcat-8bdd5fd92a33cf05424447241033bd529b68af77.tar.gz
fatcat-8bdd5fd92a33cf05424447241033bd529b68af77.zip
Merge branch 'bnewbold-fill-changelog-gaps' into 'master'
rust: fill changelog gap helper tool See merge request webgroup/fatcat!142
-rw-r--r--notes/2022-08-12_changelog_gap.md41
-rw-r--r--rust/.gitignore1
-rw-r--r--rust/src/bin/fatcat-doctor.rs97
3 files changed, 138 insertions, 1 deletions
diff --git a/notes/2022-08-12_changelog_gap.md b/notes/2022-08-12_changelog_gap.md
new file mode 100644
index 00000000..48572973
--- /dev/null
+++ b/notes/2022-08-12_changelog_gap.md
@@ -0,0 +1,41 @@
+
+On 2022-08-11, realized that we had a "gap" in the changelog: after a VM
+reboot, the postgresql primary key sequence for the 'changelog' table had been
+incremented, but rows were not inserted (transaction hadn't finished).
+
+This was a known potential problem (naively relying on the sequence to
+increment with no gaps).
+
+As a work-around, implemented a simple "gap filler" which will create
+empty/dummy editgroups and changelog entries.
+
+This gap extends from 6153703 to 6153721, so just a couple dozen entries. The
+fixup command was:
+
+ ./target/release/fatcat-doctor backfill-changelog-gap 6153702 6153721
+ Inserted changelog: 6153703
+ Inserted changelog: 6153704
+ Inserted changelog: 6153705
+ Inserted changelog: 6153706
+ Inserted changelog: 6153707
+ Inserted changelog: 6153708
+ Inserted changelog: 6153709
+ Inserted changelog: 6153710
+ Inserted changelog: 6153711
+ Inserted changelog: 6153712
+ Inserted changelog: 6153713
+ Inserted changelog: 6153714
+ Inserted changelog: 6153715
+ Inserted changelog: 6153716
+ Inserted changelog: 6153717
+ Inserted changelog: 6153718
+ Inserted changelog: 6153719
+ Inserted changelog: 6153720
+ Inserted changelog: 6153721
+
+After that the changelog worker was happy:
+
+ Aug 13 02:41:59 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Most recent changelog index in Kafka seems to be 6153702
+ Aug 13 02:41:59 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Fetching changelogs from 6153703 through 6158547
+ Aug 13 02:43:12 wbgrp-svc502.us.archive.org fatcat-worker[386037]: Sleeping 5.0 seconds...
+
diff --git a/rust/.gitignore b/rust/.gitignore
index 03e50598..f237ae31 100644
--- a/rust/.gitignore
+++ b/rust/.gitignore
@@ -1,6 +1,5 @@
.env
target/
-bin/
fatcat-*.tar.gz
!.cargo
diff --git a/rust/src/bin/fatcat-doctor.rs b/rust/src/bin/fatcat-doctor.rs
new file mode 100644
index 00000000..6e869634
--- /dev/null
+++ b/rust/src/bin/fatcat-doctor.rs
@@ -0,0 +1,97 @@
+//! Database cleanup tool
+
+use clap::{value_t_or_exit, App, SubCommand};
+
+use fatcat::database_models::*;
+use fatcat::database_schema::*;
+use fatcat::errors::Result;
+use fatcat::identifiers::FatcatId;
+use fatcat::server;
+use fatcat::server::DbConn;
+use std::process;
+use std::str::FromStr;
+
+use diesel;
+use diesel::prelude::*;
+
+fn backfill_changelog_gap(conn: &DbConn, last_good: i64, max_index: i64) -> Result<()> {
+ // sanity check arguments against database
+ assert!(last_good > 0);
+ assert!(max_index > 0);
+ assert!(last_good < max_index);
+ let highest_row: ChangelogRow = changelog::table.order(changelog::id.desc()).first(conn)?;
+ assert!(highest_row.id >= max_index);
+
+ // default values
+ // 'root' editor_id is aaaaaaaaaaaabkvkaaaaaaaaae
+ // 'admin' editor_id is aaaaaaaaaaaabkvkaaaaaaaaai
+ let editor_id = FatcatId::from_str("aaaaaaaaaaaabkvkaaaaaaaaai").unwrap();
+ let description = "Backfill of missing changelog entries due to database id gap";
+
+ // fetch the last entry before the gap, to re-use the timestamp
+ let existing_row: ChangelogRow = changelog::table.find(last_good).first(conn)?;
+
+ for index in last_good + 1..max_index + 1 {
+ // ensure this index is actually a gap
+ let count: i64 = changelog::table
+ .filter(changelog::id.eq(index))
+ .count()
+ .get_result(conn)?;
+ if count != 0 {
+ println!("Found existing changelog: {}", index);
+ return Ok(());
+ }
+
+ // create dummy empty editgroup, then add a changelog entry
+ let eg_row: EditgroupRow = diesel::insert_into(editgroup::table)
+ .values((
+ editgroup::editor_id.eq(editor_id.to_uuid()),
+ editgroup::created.eq(existing_row.timestamp),
+ editgroup::is_accepted.eq(true),
+ editgroup::description.eq(Some(description)),
+ ))
+ .get_result(conn)?;
+ let _entry_row: ChangelogRow = diesel::insert_into(changelog::table)
+ .values((
+ changelog::id.eq(index),
+ changelog::editgroup_id.eq(eg_row.id),
+ changelog::timestamp.eq(existing_row.timestamp),
+ ))
+ .get_result(conn)?;
+ println!("Inserted changelog: {}", index);
+ }
+ Ok(())
+}
+
+fn main() -> Result<()> {
+ let m = App::new("fatcat-doctor")
+ .version(env!("CARGO_PKG_VERSION"))
+ .author("Bryan Newbold <bnewbold@archive.org>")
+ .about("Database cleanup / fixup tool")
+ .subcommand(
+ SubCommand::with_name("backfill-changelog-gap")
+ .about("Inserts dummy changelog entries and editgroups for gap")
+ .args_from_usage(
+ "<start> 'changelog index of entry just before gap'
+ <end> 'highest changelog index to backfill'",
+ ),
+ )
+ .get_matches();
+
+ let db_conn = server::database_worker_pool()?
+ .get()
+ .expect("database pool");
+ match m.subcommand() {
+ ("backfill-changelog-gap", Some(subm)) => {
+ let last_good: i64 = value_t_or_exit!(subm.value_of("start"), i64);
+ let max_index: i64 = value_t_or_exit!(subm.value_of("end"), i64);
+ backfill_changelog_gap(&db_conn, last_good, max_index)?;
+ }
+ _ => {
+ println!("Missing or unimplemented command!");
+ println!("{}", m.usage());
+ process::exit(-1);
+ }
+ }
+ Ok(())
+}