From 8fb0feabfc556953d7e03af6417bf3aa98a897ed Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 2 Jun 2020 16:44:30 -0700 Subject: refactor main commands --- chocula/__main__.py | 136 +++++++++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 59 deletions(-) mode change 100755 => 100644 chocula/__main__.py diff --git a/chocula/__main__.py b/chocula/__main__.py old mode 100755 new mode 100644 index 21f3976..f897dd1 --- a/chocula/__main__.py +++ b/chocula/__main__.py @@ -16,30 +16,30 @@ Commands: summarize export export_fatcat - - index_doaj - index_road - index_crossref - index_entrez - index_norwegian - index_szczepanski - index_ezb - index_wikidata - index_openapc - index_sim - - load_fatcat_containers - load_fatcat_stats - load_homepage_status - export_urls -Future commands: - - index_jurn - index_datacite - preserve_kbart --keeper SLUG - preserve_sim + directory + doaj + road + crossref + entrez + norwegian + szczepanski + ezb + wikidata + openapc + sim + + load + fatcat_containers + fatcat_stats + homepage_status + + kbart + jstor + clockss + lockss + portico See TODO.md for more work-in-progress """ @@ -48,7 +48,8 @@ import sys import csv import argparse -from chocula import ChoculaDatabase, ChoculaConfig, IssnDatabase, ALL_CHOCULA_DIR_CLASSES +from chocula import ChoculaDatabase, ChoculaConfig, IssnDatabase,\ + ALL_CHOCULA_DIR_CLASSES, ALL_CHOCULA_KBART_CLASSES def run_everything(config, database): @@ -58,27 +59,48 @@ def run_everything(config, database): loader = cls(config) counts = loader.index_file(database) print(counts) + for cls in ALL_CHOCULA_KBART_CLASSES: + loader = cls(config) + counts = loader.index_file(database) + print(counts) - # XXX: TODO: database.load_fatcat_containers(config) database.load_fatcat_stats(config) - # XXX: TODO: - #self.preserve_kbart('lockss', LOCKSS_FILE) - #self.preserve_kbart('clockss', CLOCKSS_FILE) - #self.preserve_kbart('portico', PORTICO_FILE) - #self.preserve_kbart('jstor', JSTOR_FILE) - #self.preserve_sim(args) database.load_homepage_status(config) database.summarize() print("### Done with everything!") -def run_index(config, database, cls): - loader = cls(config) - counts = loader.index_file(database) - print(counts) +def run_directory(config, database, source): + for cls in ALL_CHOCULA_DIR_CLASSES: + if cls.source_slug == source: + loader = cls(config) + counts = loader.index_file(database) + print(counts) + return + raise NotImplementedError(f"unknown source: {source}") + +def run_kbart(config, database, source): + for cls in ALL_CHOCULA_KBART_CLASSES: + if cls.source_slug == source: + loader = cls(config) + counts = loader.index_file(database) + print(counts) + return + raise NotImplementedError(f"unknown source: {source}") + +def run_load(config, database, source): + if source == 'fatcat_stats': + print(database.load_fatcat_stats(config)) + elif source == 'fatcat_containers': + print(database.load_fatcat_containers(config)) + elif source == 'homepage_status': + print(database.load_homepage_status(config)) + else: + raise NotImplementedError(f"unknown source: {source}") def main(): parser = argparse.ArgumentParser( + prog="python -m chocula", formatter_class=argparse.ArgumentDefaultsHelpFormatter) subparsers = parser.add_subparsers() @@ -107,46 +129,42 @@ def main(): help="dump JSON output in a format that can load into fatcat") sub.set_defaults(func='export_fatcat') - for cls in ALL_CHOCULA_DIR_CLASSES: - sub = subparsers.add_parser('index_{}'.format(cls.source_slug), - help="load metadata from {}".format(cls.source_slug)) - sub.set_defaults(func='index_{}'.format(cls.source_slug), index_cls=cls) - - sub = subparsers.add_parser('load_fatcat_containers', - help="load fatcat container metadata") - sub.set_defaults(func='load_fatcat_containers') - - sub = subparsers.add_parser('load_fatcat_stats', - help="update container-level stats from JSON file") - sub.set_defaults(func='load_fatcat_stats') - sub = subparsers.add_parser('export_urls', help="dump homepage URLs (eg, to crawl for status)") sub.set_defaults(func='export_urls') - sub = subparsers.add_parser('load_homepage_status', - help="import homepage URL crawl status") - sub.set_defaults(func='load_homepage_status') + sub = subparsers.add_parser('directory', + help="index directory metadata from a given source") + sub.add_argument("source", type=str, help="short name of source to index") + sub.set_defaults(func=run_directory) + + sub = subparsers.add_parser('load', + help="load metadata of a given type") + sub.add_argument("source", type=str, help="short name of source to index") + sub.set_defaults(func=run_load) + + sub = subparsers.add_parser('kbart', + help="index KBART holding metadata for a given source") + sub.add_argument("source", type=str, help="short name of source to index") + sub.set_defaults(func=run_kbart) args = parser.parse_args() if not args.__dict__.get("func"): - print("tell me what to do! (try --help)") + parser.print_help() sys.exit(-1) config = ChoculaConfig.from_file() - if args.func.startswith('index_') or args.func in ('everything','summarize',): + issn_db: Optional[IssnDatabase] = None + if args.func in ('everything', 'summarize', run_directory, run_kbart): issn_db = IssnDatabase(config.issnl.filepath) - else: - issn_db = None + cdb = ChoculaDatabase(args.db_file, issn_db) if args.func == 'everything': run_everything(config, cdb) - elif args.func.startswith('index_'): - print(run_index(config, cdb, args.index_cls)) - elif args.func.startswith('load_'): - func = getattr(cdb, args.func) - print(func(config)) + elif args.func in (run_directory, run_load, run_kbart): + args.func(config, cdb, args.source) else: + # all other commands run on the ChoculaDatabase itself func = getattr(cdb, args.func) print(func(), file=sys.stderr) -- cgit v1.2.3