From 5007ee299ce07b31db6d48cd4ab2587f87af53ab Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 17 Aug 2020 17:35:47 -0700 Subject: bulk edit log: add notes on recent chocula import --- notes/bulk_edits/2020-08-05_chocula.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 notes/bulk_edits/2020-08-05_chocula.md diff --git a/notes/bulk_edits/2020-08-05_chocula.md b/notes/bulk_edits/2020-08-05_chocula.md new file mode 100644 index 00000000..78d2c9c3 --- /dev/null +++ b/notes/bulk_edits/2020-08-05_chocula.md @@ -0,0 +1,17 @@ + +Using `journal-metadata-bot` and `chocula_fatcat_export.2020-07-31.json` export. + +Start small: + + head -n100 /srv/fatcat/datasets/chocula_fatcat_export.2020-07-31.json | ./fatcat_import.py chocula --do-updates - + => Counter({'total': 100, 'update': 67, 'exists': 25, 'exists-skip-update': 24, 'insert': 8, 'exists-by-issnl': 1, 'skip': 0}) + +Full batch: + + time cat /srv/fatcat/datasets/chocula_fatcat_export.2020-07-31.json | ./fatcat_import.py chocula --do-updates - + + Counter({'total': 164950, 'update': 112074, 'exists': 37243, 'exists-skip-update': 35862, 'insert': 15633, 'exists-by-issnl': 1381, 'skip': 0}) + + real 16m9.779s + user 6m26.324s + sys 0m16.088s -- cgit v1.2.3