#!/usr/bin/env python3 """ Count Chocula - online serials metadata and stats "one, two, three, un-preserved web-native open-access long-tail indie journals, hah, hah, hah!" (yeah, I know, this name isn't very good) (see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html) Commands: everything init_db summarize export export_fatcat export_urls directory doaj road crossref entrez norwegian szczepanski ezb wikidata openapc sim load fatcat_containers fatcat_stats homepage_status kbart jstor clockss lockss portico pkp_pln hathitrust See TODO.md for more work-in-progress """ import sys import argparse from typing import Optional from chocula import ( ChoculaDatabase, ChoculaConfig, IssnDatabase, ALL_CHOCULA_DIR_CLASSES, ALL_CHOCULA_KBART_CLASSES, ) def run_everything(config, database): database.init_db() for cls in ALL_CHOCULA_DIR_CLASSES: loader = cls(config) counts = loader.index_file(database) print(counts) for cls in ALL_CHOCULA_KBART_CLASSES: loader = cls(config) counts = loader.index_file(database) print(counts) database.load_fatcat_containers(config) database.load_fatcat_stats(config) database.load_homepage_status(config) database.summarize() print("### Done with everything!") def run_directory(config, database, source): for cls in ALL_CHOCULA_DIR_CLASSES: if cls.source_slug == source: loader = cls(config) counts = loader.index_file(database) print(counts) return raise NotImplementedError(f"unknown source: {source}") def run_kbart(config, database, source): for cls in ALL_CHOCULA_KBART_CLASSES: if cls.source_slug == source: loader = cls(config) counts = loader.index_file(database) print(counts) return raise NotImplementedError(f"unknown source: {source}") def run_load(config, database, source): if source == "fatcat_stats": print(database.load_fatcat_stats(config)) elif source == "fatcat_containers": print(database.load_fatcat_containers(config)) elif source == "homepage_status": print(database.load_homepage_status(config)) else: raise NotImplementedError(f"unknown source: {source}") def main(): parser = argparse.ArgumentParser( prog="python -m chocula", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) subparsers = parser.add_subparsers() parser.add_argument( "--db-file", help="sqlite database file", default="chocula.sqlite", type=str ) sub = subparsers.add_parser("everything", help="run all the commands") sub.set_defaults(func="everything") sub = subparsers.add_parser("init_db", help="create sqlite3 output file and tables") sub.set_defaults(func="init_db") sub = subparsers.add_parser( "summarize", help="aggregate metadata from all tables into 'journals' table" ) sub.set_defaults(func="summarize") sub = subparsers.add_parser("export", help="dump JSON output") sub.set_defaults(func="export") sub = subparsers.add_parser( "export_fatcat", help="dump JSON output in a format that can load into fatcat" ) sub.set_defaults(func="export_fatcat") sub = subparsers.add_parser( "export_urls", help="dump homepage URLs (eg, to crawl for status)" ) sub.set_defaults(func="export_urls") sub = subparsers.add_parser( "directory", help="index directory metadata from a given source" ) sub.add_argument("source", type=str, help="short name of source to index") sub.set_defaults(func=run_directory) sub = subparsers.add_parser("load", help="load metadata of a given type") sub.add_argument("source", type=str, help="short name of source to index") sub.set_defaults(func=run_load) sub = subparsers.add_parser( "kbart", help="index KBART holding metadata for a given source" ) sub.add_argument("source", type=str, help="short name of source to index") sub.set_defaults(func=run_kbart) args = parser.parse_args() if not args.__dict__.get("func"): parser.print_help() sys.exit(-1) config = ChoculaConfig.from_file() issn_db: Optional[IssnDatabase] = None if args.func in ("everything", "summarize", run_directory, run_kbart): issn_db = IssnDatabase(config.issnl.filepath) cdb = ChoculaDatabase(args.db_file, issn_db) if args.func == "everything": run_everything(config, cdb) elif args.func in (run_directory, run_load, run_kbart): args.func(config, cdb, args.source) else: # all other commands run on the ChoculaDatabase itself func = getattr(cdb, args.func) print(func(), file=sys.stderr) if __name__ == "__main__": main()