aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--[-rwxr-xr-x]chocula/__main__.py136
1 files changed, 77 insertions, 59 deletions
diff --git a/chocula/__main__.py b/chocula/__main__.py
index 21f3976..f897dd1 100755..100644
--- a/chocula/__main__.py
+++ b/chocula/__main__.py
@@ -16,30 +16,30 @@ Commands:
summarize
export
export_fatcat
-
- index_doaj
- index_road
- index_crossref
- index_entrez
- index_norwegian
- index_szczepanski
- index_ezb
- index_wikidata
- index_openapc
- index_sim
-
- load_fatcat_containers
- load_fatcat_stats
- load_homepage_status
-
export_urls
-Future commands:
-
- index_jurn
- index_datacite
- preserve_kbart --keeper SLUG
- preserve_sim
+ directory <source>
+ doaj
+ road
+ crossref
+ entrez
+ norwegian
+ szczepanski
+ ezb
+ wikidata
+ openapc
+ sim
+
+ load <source>
+ fatcat_containers
+ fatcat_stats
+ homepage_status
+
+ kbart <source>
+ jstor
+ clockss
+ lockss
+ portico
See TODO.md for more work-in-progress
"""
@@ -48,7 +48,8 @@ import sys
import csv
import argparse
-from chocula import ChoculaDatabase, ChoculaConfig, IssnDatabase, ALL_CHOCULA_DIR_CLASSES
+from chocula import ChoculaDatabase, ChoculaConfig, IssnDatabase,\
+ ALL_CHOCULA_DIR_CLASSES, ALL_CHOCULA_KBART_CLASSES
def run_everything(config, database):
@@ -58,27 +59,48 @@ def run_everything(config, database):
loader = cls(config)
counts = loader.index_file(database)
print(counts)
+ for cls in ALL_CHOCULA_KBART_CLASSES:
+ loader = cls(config)
+ counts = loader.index_file(database)
+ print(counts)
- # XXX: TODO:
database.load_fatcat_containers(config)
database.load_fatcat_stats(config)
- # XXX: TODO:
- #self.preserve_kbart('lockss', LOCKSS_FILE)
- #self.preserve_kbart('clockss', CLOCKSS_FILE)
- #self.preserve_kbart('portico', PORTICO_FILE)
- #self.preserve_kbart('jstor', JSTOR_FILE)
- #self.preserve_sim(args)
database.load_homepage_status(config)
database.summarize()
print("### Done with everything!")
-def run_index(config, database, cls):
- loader = cls(config)
- counts = loader.index_file(database)
- print(counts)
+def run_directory(config, database, source):
+ for cls in ALL_CHOCULA_DIR_CLASSES:
+ if cls.source_slug == source:
+ loader = cls(config)
+ counts = loader.index_file(database)
+ print(counts)
+ return
+ raise NotImplementedError(f"unknown source: {source}")
+
+def run_kbart(config, database, source):
+ for cls in ALL_CHOCULA_KBART_CLASSES:
+ if cls.source_slug == source:
+ loader = cls(config)
+ counts = loader.index_file(database)
+ print(counts)
+ return
+ raise NotImplementedError(f"unknown source: {source}")
+
+def run_load(config, database, source):
+ if source == 'fatcat_stats':
+ print(database.load_fatcat_stats(config))
+ elif source == 'fatcat_containers':
+ print(database.load_fatcat_containers(config))
+ elif source == 'homepage_status':
+ print(database.load_homepage_status(config))
+ else:
+ raise NotImplementedError(f"unknown source: {source}")
def main():
parser = argparse.ArgumentParser(
+ prog="python -m chocula",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers()
@@ -107,46 +129,42 @@ def main():
help="dump JSON output in a format that can load into fatcat")
sub.set_defaults(func='export_fatcat')
- for cls in ALL_CHOCULA_DIR_CLASSES:
- sub = subparsers.add_parser('index_{}'.format(cls.source_slug),
- help="load metadata from {}".format(cls.source_slug))
- sub.set_defaults(func='index_{}'.format(cls.source_slug), index_cls=cls)
-
- sub = subparsers.add_parser('load_fatcat_containers',
- help="load fatcat container metadata")
- sub.set_defaults(func='load_fatcat_containers')
-
- sub = subparsers.add_parser('load_fatcat_stats',
- help="update container-level stats from JSON file")
- sub.set_defaults(func='load_fatcat_stats')
-
sub = subparsers.add_parser('export_urls',
help="dump homepage URLs (eg, to crawl for status)")
sub.set_defaults(func='export_urls')
- sub = subparsers.add_parser('load_homepage_status',
- help="import homepage URL crawl status")
- sub.set_defaults(func='load_homepage_status')
+ sub = subparsers.add_parser('directory',
+ help="index directory metadata from a given source")
+ sub.add_argument("source", type=str, help="short name of source to index")
+ sub.set_defaults(func=run_directory)
+
+ sub = subparsers.add_parser('load',
+ help="load metadata of a given type")
+ sub.add_argument("source", type=str, help="short name of source to index")
+ sub.set_defaults(func=run_load)
+
+ sub = subparsers.add_parser('kbart',
+ help="index KBART holding metadata for a given source")
+ sub.add_argument("source", type=str, help="short name of source to index")
+ sub.set_defaults(func=run_kbart)
args = parser.parse_args()
if not args.__dict__.get("func"):
- print("tell me what to do! (try --help)")
+ parser.print_help()
sys.exit(-1)
config = ChoculaConfig.from_file()
- if args.func.startswith('index_') or args.func in ('everything','summarize',):
+ issn_db: Optional[IssnDatabase] = None
+ if args.func in ('everything', 'summarize', run_directory, run_kbart):
issn_db = IssnDatabase(config.issnl.filepath)
- else:
- issn_db = None
+
cdb = ChoculaDatabase(args.db_file, issn_db)
if args.func == 'everything':
run_everything(config, cdb)
- elif args.func.startswith('index_'):
- print(run_index(config, cdb, args.index_cls))
- elif args.func.startswith('load_'):
- func = getattr(cdb, args.func)
- print(func(config))
+ elif args.func in (run_directory, run_load, run_kbart):
+ args.func(config, cdb, args.source)
else:
+ # all other commands run on the ChoculaDatabase itself
func = getattr(cdb, args.func)
print(func(), file=sys.stderr)