summaryrefslogtreecommitdiffstats
path: root/Makefile
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-02 17:54:05 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-02 17:54:05 -0800
commit25d1afe6b22eccbf86de867ec86a294f2771faff (patch)
treece219a2cd1b9710beb5588c79903e87eedfa18b7 /Makefile
parent4f228b1c9ece8efe0635e44cb5fdc7ecc5c66ec7 (diff)
downloadfatcat-scholar-25d1afe6b22eccbf86de867ec86a294f2771faff.tar.gz
fatcat-scholar-25d1afe6b22eccbf86de867ec86a294f2771faff.zip
issue-db build process: tweak archive.org filters; increase parallelism
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile8
1 files changed, 4 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index c144dd1..3c163d1 100644
--- a/Makefile
+++ b/Makefile
@@ -74,20 +74,20 @@ recompile-i18n: extract-i18n ## Re-extract and compile all translation files (b
data/$(TODAY)/sim_collections.tsv:
mkdir -p data/$(TODAY)
- pipenv run ia search "collection:periodicals collection:sim_microfilm mediatype:collection" --itemlist | rg "^pub_" > $@.wip
+ pipenv run ia search 'collection:periodicals collection:sim_microfilm mediatype:collection (pub_type:"Scholarly Journals" OR pub_type:"Historical Journals" OR pub_type:"Law Journals")' --itemlist | rg "^pub_" | pv -l > $@.wip
mv $@.wip $@
data/$(TODAY)/sim_items.tsv:
mkdir -p data/$(TODAY)
- pipenv run ia search "collection:periodicals collection:sim_microfilm mediatype:texts !noindex:true !pub_type:Magazines" --itemlist | rg "^sim_" | pv -l > $@.wip
+ pipenv run ia search 'collection:periodicals collection:sim_microfilm mediatype:texts !noindex:true (pub_type:"Scholarly Journals" OR pub_type:"Historical Journals" OR pub_type:"Law Journals")' --itemlist | rg "^sim_" | pv -l > $@.wip
mv $@.wip $@
data/$(TODAY)/sim_collections.json: data/$(TODAY)/sim_collections.tsv
- cat data/$(TODAY)/sim_collections.tsv | pipenv run parallel -j4 ia metadata {} | jq . -c | pv -l > $@.wip
+ cat data/$(TODAY)/sim_collections.tsv | pipenv run parallel -j20 ia metadata {} | jq . -c | pv -l > $@.wip
mv $@.wip $@
data/$(TODAY)/sim_items.json: data/$(TODAY)/sim_items.tsv
- cat data/$(TODAY)/sim_items.tsv | pipenv run parallel -j8 ia metadata {} | jq -c 'del(.histograms, .rotations)' | pv -l > $@.wip
+ cat data/$(TODAY)/sim_items.tsv | pipenv run parallel -j20 ia metadata {} | jq -c 'del(.histograms, .rotations)' | pv -l > $@.wip
mv $@.wip $@
data/$(TODAY)/issue_db.sqlite: data/$(TODAY)/sim_collections.json data/$(TODAY)/sim_items.json