aboutsummaryrefslogtreecommitdiffstats
path: root/chocula_tool.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-01 17:01:20 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-01 17:01:20 -0700
commit57db2db336c08031324e44b2d2880fbd4b6893c9 (patch)
treef5ad462ab6b3e7d3ac7987049e8c604bd5ee9fbe /chocula_tool.py
parent08867f9b8de576f0831e6bb9f7b88acddcc31dee (diff)
downloadchocula-57db2db336c08031324e44b2d2880fbd4b6893c9.tar.gz
chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.zip
'everything' at least partially working
Diffstat (limited to 'chocula_tool.py')
-rwxr-xr-xchocula_tool.py123
1 files changed, 0 insertions, 123 deletions
diff --git a/chocula_tool.py b/chocula_tool.py
deleted file mode 100755
index 7dfe80e..0000000
--- a/chocula_tool.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Count Chocula - online serials metadata and stats
-
- "one, two, three, un-preserved web-native open-access long-tail indie
- journals, hah, hah, hah!"
-
- (yeah, I know, this name isn't very good)
- (see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html)
-
-Commands:
-
- everything
- init_db
- summarize
- export
- export_fatcat
-
- index_doaj
- index_road
- index_crossref
- index_entrez
- index_norwegian
- index_szczepanski
- index_ezb
- index_wikidata
- index_openapc
-
- load_fatcat
- load_fatcat_stats
-
- export_urls
- update_url_status
-
-Future commands:
-
- index_jurn
- index_datacite
- preserve_kbart --keeper SLUG
- preserve_sim
-
-See TODO.md for more work-in-progress
-"""
-
-import sys
-import csv
-import argparse
-
-from chocula import ChoculaDatabase, ChoculaConfig
-
-
-def main():
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- subparsers = parser.add_subparsers()
-
- parser.add_argument("--db-file",
- help="run in mode that considers only terminal HTML success",
- default='chocula.sqlite',
- type=str)
- parser.add_argument("--input-file",
- help="override default input file path",
- default=None,
- type=str)
-
- sub = subparsers.add_parser('everything',
- help="run all the commands")
- sub.set_defaults(func='everything')
-
- sub = subparsers.add_parser('init_db',
- help="create sqlite3 output file and tables")
- sub.set_defaults(func='init_db')
-
- sub = subparsers.add_parser('summarize',
- help="aggregate metadata from all tables into 'journals' table")
- sub.set_defaults(func='summarize')
-
- sub = subparsers.add_parser('export',
- help="dump JSON output")
- sub.set_defaults(func='export')
-
- sub = subparsers.add_parser('export_fatcat',
- help="dump JSON output in a format that can load into fatcat")
- sub.set_defaults(func='export_fatcat')
-
- # TODO: 'jurn'
- for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa', 'wikidata', 'openapc'):
- sub = subparsers.add_parser('index_{}'.format(ind),
- help="load metadata from {}".format(ind))
- sub.set_defaults(func='index_{}'.format(ind))
-
- sub = subparsers.add_parser('load_fatcat',
- help="load fatcat container metadata")
- sub.set_defaults(func='load_fatcat')
-
- sub = subparsers.add_parser('load_fatcat_stats',
- help="update container-level stats from JSON file")
- sub.set_defaults(func='load_fatcat_stats')
-
- sub = subparsers.add_parser('export_urls',
- help="dump homepage URLs (eg, to crawl for status)")
- sub.set_defaults(func='export_urls')
-
- sub = subparsers.add_parser('update_url_status',
- help="import homepage URL crawl status")
- sub.set_defaults(func='update_url_status')
-
- args = parser.parse_args()
- if not args.__dict__.get("func"):
- print("tell me what to do! (try --help)")
- sys.exit(-1)
-
- config = ChoculaConfig.from_file()
- cdb = ChoculaDatabase(args.db_file)
- if args.func.startswith('index_') or args.func in ('everything','summarize',):
- cdb.read_issn_map_file(config.issnl.filepath)
- func = getattr(cdb, args.func)
- func(args)
-
-if __name__ == '__main__':
- main()
-