aboutsummaryrefslogtreecommitdiffstats
path: root/chocula_tool.py
blob: 76e7c592e7282bd37714e8f4c9addf4a25271c1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3

"""
Count Chocula - online serials metadata and stats

  "one, two, three, un-preserved web-native open-access long-tail indie
  journals, hah, hah, hah!"

  (yeah, I know, this name isn't very good)
  (see also: https://teamyacht.com/ernstchoukula.com/Ernst-Choukula.html)

Commands:

    everything
    init_db
    summarize
    export
    export_fatcat

    index_doaj
    index_road
    index_crossref
    index_entrez
    index_norwegian
    index_szczepanski
    index_ezb
    index_wikidata
    index_openapc

    load_fatcat
    load_fatcat_stats

    export_urls
    update_url_status

Future commands:

    index_jurn
    index_datacite
    preserve_kbart --keeper SLUG
    preserve_sim

See TODO.md for more work-in-progress
"""

import sys
import csv
import argparse

from chocula import ChoculaDatabase, ChoculaConfig


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    subparsers = parser.add_subparsers()

    parser.add_argument("--db-file",
        help="run in mode that considers only terminal HTML success",
        default='chocula.sqlite',
        type=str)
    parser.add_argument("--input-file",
        help="override default input file path",
        default=None,
        type=str)

    sub = subparsers.add_parser('everything',
        help="run all the commands")
    sub.set_defaults(func='everything')

    sub = subparsers.add_parser('init_db',
        help="create sqlite3 output file and tables")
    sub.set_defaults(func='init_db')

    sub = subparsers.add_parser('summarize',
        help="aggregate metadata from all tables into 'journals' table")
    sub.set_defaults(func='summarize')

    sub = subparsers.add_parser('export',
        help="dump JSON output")
    sub.set_defaults(func='export')

    sub = subparsers.add_parser('export_fatcat',
        help="dump JSON output in a format that can load into fatcat")
    sub.set_defaults(func='export_fatcat')

    # TODO: 'jurn'
    for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa', 'wikidata', 'openapc'):
        sub = subparsers.add_parser('index_{}'.format(ind),
            help="load metadata from {}".format(ind))
        sub.set_defaults(func='index_{}'.format(ind))

    sub = subparsers.add_parser('load_fatcat',
        help="load fatcat container metadata")
    sub.set_defaults(func='load_fatcat')

    sub = subparsers.add_parser('load_fatcat_stats',
        help="update container-level stats from JSON file")
    sub.set_defaults(func='load_fatcat_stats')

    sub = subparsers.add_parser('export_urls',
        help="dump homepage URLs (eg, to crawl for status)")
    sub.set_defaults(func='export_urls')

    sub = subparsers.add_parser('update_url_status',
        help="import homepage URL crawl status")
    sub.set_defaults(func='update_url_status')

    args = parser.parse_args()
    if not args.__dict__.get("func"):
        print("tell me what to do! (try --help)")
        sys.exit(-1)

    cdb = ChoculaDatabase(args.db_file)
    if args.func.startswith('index_') or args.func in ('everything','summarize',):
        cdb.read_issn_map_file(ISSNL_FILE)
    func = getattr(cdb, args.func)
    func(args)

if __name__ == '__main__':
    main()