1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#!/usr/bin/env python3
"""
Count Chocula - online serials metadata and stats
"one, two, three, un-preserved web-native open-access long-tail indie
journals, hah, hah, hah!"
(yeah, I know, this name isn't very good)
(see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html)
Commands:
everything
init_db
summarize
export
export_fatcat
index_doaj
index_road
index_crossref
index_entrez
index_norwegian
index_szczepanski
index_ezb
index_wikidata
index_openapc
load_fatcat
load_fatcat_stats
export_urls
update_url_status
Future commands:
index_jurn
index_datacite
preserve_kbart --keeper SLUG
preserve_sim
See TODO.md for more work-in-progress
"""
import sys
import csv
import argparse
from chocula import ChoculaDatabase, ChoculaConfig
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers()
parser.add_argument("--db-file",
help="run in mode that considers only terminal HTML success",
default='chocula.sqlite',
type=str)
parser.add_argument("--input-file",
help="override default input file path",
default=None,
type=str)
sub = subparsers.add_parser('everything',
help="run all the commands")
sub.set_defaults(func='everything')
sub = subparsers.add_parser('init_db',
help="create sqlite3 output file and tables")
sub.set_defaults(func='init_db')
sub = subparsers.add_parser('summarize',
help="aggregate metadata from all tables into 'journals' table")
sub.set_defaults(func='summarize')
sub = subparsers.add_parser('export',
help="dump JSON output")
sub.set_defaults(func='export')
sub = subparsers.add_parser('export_fatcat',
help="dump JSON output in a format that can load into fatcat")
sub.set_defaults(func='export_fatcat')
# TODO: 'jurn'
for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa', 'wikidata', 'openapc'):
sub = subparsers.add_parser('index_{}'.format(ind),
help="load metadata from {}".format(ind))
sub.set_defaults(func='index_{}'.format(ind))
sub = subparsers.add_parser('load_fatcat',
help="load fatcat container metadata")
sub.set_defaults(func='load_fatcat')
sub = subparsers.add_parser('load_fatcat_stats',
help="update container-level stats from JSON file")
sub.set_defaults(func='load_fatcat_stats')
sub = subparsers.add_parser('export_urls',
help="dump homepage URLs (eg, to crawl for status)")
sub.set_defaults(func='export_urls')
sub = subparsers.add_parser('update_url_status',
help="import homepage URL crawl status")
sub.set_defaults(func='update_url_status')
args = parser.parse_args()
if not args.__dict__.get("func"):
print("tell me what to do! (try --help)")
sys.exit(-1)
config = ChoculaConfig.from_file()
cdb = ChoculaDatabase(args.db_file)
if args.func.startswith('index_') or args.func in ('everything','summarize',):
cdb.read_issn_map_file(config.issnl.filepath)
func = getattr(cdb, args.func)
func(args)
if __name__ == '__main__':
main()
|