diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-01 17:01:20 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-01 17:01:20 -0700 |
commit | 57db2db336c08031324e44b2d2880fbd4b6893c9 (patch) | |
tree | f5ad462ab6b3e7d3ac7987049e8c604bd5ee9fbe /chocula_tool.py | |
parent | 08867f9b8de576f0831e6bb9f7b88acddcc31dee (diff) | |
download | chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.tar.gz chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.zip |
'everything' at least partially working
Diffstat (limited to 'chocula_tool.py')
-rwxr-xr-x | chocula_tool.py | 123 |
1 files changed, 0 insertions, 123 deletions
diff --git a/chocula_tool.py b/chocula_tool.py deleted file mode 100755 index 7dfe80e..0000000 --- a/chocula_tool.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 - -""" -Count Chocula - online serials metadata and stats - - "one, two, three, un-preserved web-native open-access long-tail indie - journals, hah, hah, hah!" - - (yeah, I know, this name isn't very good) - (see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html) - -Commands: - - everything - init_db - summarize - export - export_fatcat - - index_doaj - index_road - index_crossref - index_entrez - index_norwegian - index_szczepanski - index_ezb - index_wikidata - index_openapc - - load_fatcat - load_fatcat_stats - - export_urls - update_url_status - -Future commands: - - index_jurn - index_datacite - preserve_kbart --keeper SLUG - preserve_sim - -See TODO.md for more work-in-progress -""" - -import sys -import csv -import argparse - -from chocula import ChoculaDatabase, ChoculaConfig - - -def main(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - subparsers = parser.add_subparsers() - - parser.add_argument("--db-file", - help="run in mode that considers only terminal HTML success", - default='chocula.sqlite', - type=str) - parser.add_argument("--input-file", - help="override default input file path", - default=None, - type=str) - - sub = subparsers.add_parser('everything', - help="run all the commands") - sub.set_defaults(func='everything') - - sub = subparsers.add_parser('init_db', - help="create sqlite3 output file and tables") - sub.set_defaults(func='init_db') - - sub = subparsers.add_parser('summarize', - help="aggregate metadata from all tables into 'journals' table") - sub.set_defaults(func='summarize') - - sub = subparsers.add_parser('export', - help="dump JSON output") - sub.set_defaults(func='export') - - sub = subparsers.add_parser('export_fatcat', - help="dump JSON output in a format that can load into fatcat") - sub.set_defaults(func='export_fatcat') - - # TODO: 'jurn' - for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa', 'wikidata', 'openapc'): - sub = subparsers.add_parser('index_{}'.format(ind), - help="load metadata from {}".format(ind)) - sub.set_defaults(func='index_{}'.format(ind)) - - sub = subparsers.add_parser('load_fatcat', - help="load fatcat container metadata") - sub.set_defaults(func='load_fatcat') - - sub = subparsers.add_parser('load_fatcat_stats', - help="update container-level stats from JSON file") - sub.set_defaults(func='load_fatcat_stats') - - sub = subparsers.add_parser('export_urls', - help="dump homepage URLs (eg, to crawl for status)") - sub.set_defaults(func='export_urls') - - sub = subparsers.add_parser('update_url_status', - help="import homepage URL crawl status") - sub.set_defaults(func='update_url_status') - - args = parser.parse_args() - if not args.__dict__.get("func"): - print("tell me what to do! (try --help)") - sys.exit(-1) - - config = ChoculaConfig.from_file() - cdb = ChoculaDatabase(args.db_file) - if args.func.startswith('index_') or args.func in ('everything','summarize',): - cdb.read_issn_map_file(config.issnl.filepath) - func = getattr(cdb, args.func) - func(args) - -if __name__ == '__main__': - main() - |