diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 13:37:02 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 13:37:02 -0700 | 
| commit | 2d3d0274a23f0e52bff8b786aa7a930cb5b74c99 (patch) | |
| tree | a884037b549c81e38b317153b8eb1f4913b3cd67 /notes | |
| parent | 202c6fc3cd1a5b1ba2bb005219e271dc7d4977f5 (diff) | |
| download | fatcat-scholar-2d3d0274a23f0e52bff8b786aa7a930cb5b74c99.tar.gz fatcat-scholar-2d3d0274a23f0e52bff8b786aa7a930cb5b74c99.zip | |
more progress on issue_db
Diffstat (limited to 'notes')
| -rw-r--r-- | notes/issue_db.md | 13 | 
1 files changed, 13 insertions, 0 deletions
| diff --git a/notes/issue_db.md b/notes/issue_db.md new file mode 100644 index 0000000..26f98d2 --- /dev/null +++ b/notes/issue_db.md @@ -0,0 +1,13 @@ + +## Commands + +    mkdir -p data +    ia search "collection:periodicals collection:sim_microfilm mediatype:collection" --itemlist | rg "^pub_" > data/sim_collections.tsv +    ia search "collection:periodicals collection:sim_microfilm mediatype:texts" --itemlist | rg "^sim_" > data/sim_items.tsv + +    cat data/sim_collections.tsv | parallel -j4 ia metadata {} | jq . -c | pv -l > data/sim_collections.json +    cat data/sim_items.tsv | parallel -j8 ia metadata {} | jq . -c | pv -l > data/sim_items.json + +    cat data/sim_collections.2020-05-15.json | pv -l | python -m fatcat_scholar.issue_db load_pubs +    cat data/sim_items.2020-05-15.json | pv -l | python -m fatcat_scholar.issue_db load_issues +    python -m fatcat_scholar.issue_db load_counts | 
