From de0b5fb315b73e6960c77c1ae934f63bce593bc9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 6 Jul 2022 16:50:40 -0700 Subject: document bulk chocula update --- extra/bulk_edits/2022-07-06_chocula.md | 25 +++++++++++++++++++++++++ extra/bulk_edits/CHANGELOG.md | 5 +++++ 2 files changed, 30 insertions(+) create mode 100644 extra/bulk_edits/2022-07-06_chocula.md (limited to 'extra') diff --git a/extra/bulk_edits/2022-07-06_chocula.md b/extra/bulk_edits/2022-07-06_chocula.md new file mode 100644 index 00000000..86bf36fb --- /dev/null +++ b/extra/bulk_edits/2022-07-06_chocula.md @@ -0,0 +1,25 @@ + +Periodic import of chocula metadata updates. + +## Prod Import + + date + # Wed Jul 6 23:29:47 UTC 2022 + + git log -n1 + # aff3f40a5177dd6de4eee8ea7bca78df7a595bf3 + + export FATCAT_AUTH_WORKER_JOURNAL_METADATA=[...] + head -n100 /srv/fatcat/datasets/chocula_fatcat_export.2022-07-06.json | ./fatcat_import.py chocula --do-updates - + # Counter({'total': 100, 'exists': 86, 'exists-skip-update': 83, 'update': 13, 'exists-by-issnl': 3, 'insert': 1, 'skip': 0}) + +Many updates are just KBART holding dates or DOAJ as-of dates, but that is fine +and expected. + + time cat /srv/fatcat/datasets/chocula_fatcat_export.2022-07-06.json | ./fatcat_import.py chocula --do-updates - + # Counter({'total': 187480, 'exists': 155943, 'exists-skip-update': 151171, 'update': 30437, 'exists-by-issnl': 4772, 'insert': 1100, 'skip': 0}) + # real 10m28.081s + # user 4m37.447s + # sys 0m16.063s + +Now update stats, following `extra/container_count_update/README.md`. diff --git a/extra/bulk_edits/CHANGELOG.md b/extra/bulk_edits/CHANGELOG.md index 94a32947..9acf8ccd 100644 --- a/extra/bulk_edits/CHANGELOG.md +++ b/extra/bulk_edits/CHANGELOG.md @@ -9,6 +9,11 @@ this file should probably get merged into the guide at some point. This file should not turn in to a TODO list! +## 2022-07 + +Ran a journal-level metadata update, using chocula. + + ## 2022-04 Imported some initial fileset entities. -- cgit v1.2.3