diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-06 14:21:39 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-06 14:21:39 -0700 |
commit | 25b4ca85cf535629a7e1cdce02999084e5bc2535 (patch) | |
tree | 7aba40cf479c130bc0987ddb9ea5a1204971deb6 /chocula_tool.py | |
parent | 3e7e3eb8684a91fd7dc97d05f259eec61c525927 (diff) | |
download | chocula-25b4ca85cf535629a7e1cdce02999084e5bc2535.tar.gz chocula-25b4ca85cf535629a7e1cdce02999084e5bc2535.zip |
start refactoring files into module
Diffstat (limited to 'chocula_tool.py')
-rwxr-xr-x | chocula_tool.py | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/chocula_tool.py b/chocula_tool.py new file mode 100755 index 0000000..345097e --- /dev/null +++ b/chocula_tool.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 + +""" +Count Chocula - online serials metadata and stats + + "one, two, three, un-preserved web-native open-access long-tail indie + journals, hah, hah, hah!" + + (yeah, I know, this name isn't very good) + (see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html) + +Commands: + + everything + init_db + summarize + export + export_fatcat + + index_doaj + index_road + index_crossref + index_entrez + index_norwegian + index_szczepanski + index_ezb + index_wikidata + index_openapc + + load_fatcat + load_fatcat_stats + + export_urls + update_url_status + +Future commands: + + index_jurn + index_datacite + preserve_kbart --keeper SLUG + preserve_sim + +See TODO.md for more work-in-progress +""" + +import sys +import csv +import argparse + +from chocula import ChoculaDatabase +from chocula.config import * + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + subparsers = parser.add_subparsers() + + parser.add_argument("--db-file", + help="run in mode that considers only terminal HTML success", + default='chocula.sqlite', + type=str) + parser.add_argument("--input-file", + help="override default input file path", + default=None, + type=str) + + sub = subparsers.add_parser('everything', + help="run all the commands") + sub.set_defaults(func='everything') + + sub = subparsers.add_parser('init_db', + help="create sqlite3 output file and tables") + sub.set_defaults(func='init_db') + + sub = subparsers.add_parser('summarize', + help="aggregate metadata from all tables into 'journals' table") + sub.set_defaults(func='summarize') + + sub = subparsers.add_parser('export', + help="dump JSON output") + sub.set_defaults(func='export') + + sub = subparsers.add_parser('export_fatcat', + help="dump JSON output in a format that can load into fatcat") + sub.set_defaults(func='export_fatcat') + + # TODO: 'jurn' + for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa', 'wikidata', 'openapc'): + sub = subparsers.add_parser('index_{}'.format(ind), + help="load metadata from {}".format(ind)) + sub.set_defaults(func='index_{}'.format(ind)) + + sub = subparsers.add_parser('load_fatcat', + help="load fatcat container metadata") + sub.set_defaults(func='load_fatcat') + + sub = subparsers.add_parser('load_fatcat_stats', + help="update container-level stats from JSON file") + sub.set_defaults(func='load_fatcat_stats') + + sub = subparsers.add_parser('export_urls', + help="dump homepage URLs (eg, to crawl for status)") + sub.set_defaults(func='export_urls') + + sub = subparsers.add_parser('update_url_status', + help="import homepage URL crawl status") + sub.set_defaults(func='update_url_status') + + args = parser.parse_args() + if not args.__dict__.get("func"): + print("tell me what to do! (try --help)") + sys.exit(-1) + + cdb = ChoculaDatabase(args.db_file) + if args.func.startswith('index_') or args.func in ('everything','summarize',): + cdb.read_issn_map_file(ISSNL_FILE) + func = getattr(cdb, args.func) + func(args) + +if __name__ == '__main__': + main() + |