From 2d3d0274a23f0e52bff8b786aa7a930cb5b74c99 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 16 May 2020 13:37:02 -0700 Subject: more progress on issue_db --- notes/issue_db.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 notes/issue_db.md (limited to 'notes') diff --git a/notes/issue_db.md b/notes/issue_db.md new file mode 100644 index 0000000..26f98d2 --- /dev/null +++ b/notes/issue_db.md @@ -0,0 +1,13 @@ + +## Commands + + mkdir -p data + ia search "collection:periodicals collection:sim_microfilm mediatype:collection" --itemlist | rg "^pub_" > data/sim_collections.tsv + ia search "collection:periodicals collection:sim_microfilm mediatype:texts" --itemlist | rg "^sim_" > data/sim_items.tsv + + cat data/sim_collections.tsv | parallel -j4 ia metadata {} | jq . -c | pv -l > data/sim_collections.json + cat data/sim_items.tsv | parallel -j8 ia metadata {} | jq . -c | pv -l > data/sim_items.json + + cat data/sim_collections.2020-05-15.json | pv -l | python -m fatcat_scholar.issue_db load_pubs + cat data/sim_items.2020-05-15.json | pv -l | python -m fatcat_scholar.issue_db load_issues + python -m fatcat_scholar.issue_db load_counts -- cgit v1.2.3