From 87099999ebf58b31e2fecd1e3b57bf6712f08b76 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Thu, 26 Jul 2018 01:04:48 -0700
Subject: rename python scripts

---
 python/README.md         |  2 +-
 python/README_import.md  | 20 +++++------
 python/client.py         | 94 ------------------------------------------------
 python/fatcat_import.py  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 python/fatcat_webface.py | 37 +++++++++++++++++++
 python/run.py            | 37 -------------------
 6 files changed, 142 insertions(+), 142 deletions(-)
 delete mode 100755 python/client.py
 create mode 100755 python/fatcat_import.py
 create mode 100755 python/fatcat_webface.py
 delete mode 100755 python/run.py

(limited to 'python')

diff --git a/python/README.md b/python/README.md
index c7e33f0a..eebbbd9c 100644
--- a/python/README.md
+++ b/python/README.md
@@ -3,7 +3,7 @@
 
 Use `pipenv` (which you can install with `pip`).
 
-    pipenv run run.py
+    pipenv run fatcat_webface.py
 
 Run tests:
 
diff --git a/python/README_import.md b/python/README_import.md
index ae9764e6..38c8406f 100644
--- a/python/README_import.md
+++ b/python/README_import.md
@@ -24,7 +24,7 @@ the others:
 From CSV file:
 
     export LC_ALL=C.UTF-8
-    time ./client.py import-issn /srv/datasets/journal_extra_metadata.csv
+    time ./fatcat_import.py import-issn /srv/datasets/journal_extra_metadata.csv
 
     real    2m42.148s
     user    0m11.148s
@@ -36,38 +36,38 @@ Pretty quick, a few minutes.
 
 Directly from compressed tarball; takes about 2 hours in production:
 
-    tar xf /srv/datasets/public_profiles_API-2.0_2017_10_json.tar.gz -O | jq -c . | grep '"person":' | time parallel -j12 --pipe --round-robin ./client.py import-orcid -
+    tar xf /srv/datasets/public_profiles_API-2.0_2017_10_json.tar.gz -O | jq -c . | grep '"person":' | time parallel -j12 --pipe --round-robin ./fatcat_import.py import-orcid -
 
 After tuning database, `jq` CPU seems to be bottleneck, so, from pre-extracted
 tarball:
 
     tar xf /srv/datasets/public_profiles_API-2.0_2017_10_json.tar.gz -O | jq -c . | rg '"person":' > /srv/datasets/public_profiles_1_2_json.all.json
-    time parallel --bar --pipepart -j8 -a /srv/datasets/public_profiles_1_2_json.all.json ./client.py import-orcid -
+    time parallel --bar --pipepart -j8 -a /srv/datasets/public_profiles_1_2_json.all.json ./fatcat_import.py import-orcid -
 
 Does not work:
 
-    ./client.py import-orcid /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3/0000-0001-5115-8623.json
+    ./fatcat_import.py import-orcid /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3/0000-0001-5115-8623.json
 
 Instead:
 
-    cat /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3/0000-0001-5115-8623.json | jq -c . | ./client.py import-orcid -
+    cat /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3/0000-0001-5115-8623.json | jq -c . | ./fatcat_import.py import-orcid -
 
 Or for many files:
 
-    find /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3 -iname '*.json' | parallel --bar jq -c . {} | rg '"person":' | ./client.py import-orcid -
+    find /data/orcid/partial/public_profiles_API-2.0_2017_10_json/3 -iname '*.json' | parallel --bar jq -c . {} | rg '"person":' | ./fatcat_import.py import-orcid -
 
 ### ORCID Performance
 
 for ~9k files:
 
-    (python-B2RYrks8) bnewbold@orithena$ time parallel --pipepart -j4 -a /data/orcid/partial/public_profiles_API-2.0_2017_10_json/all.json ./client.py import-orcid -
+    (python-B2RYrks8) bnewbold@orithena$ time parallel --pipepart -j4 -a /data/orcid/partial/public_profiles_API-2.0_2017_10_json/all.json ./fatcat_import.py import-orcid -
     real    0m15.294s
     user    0m28.112s
     sys     0m2.408s
 
     => 636/second
 
-    (python-B2RYrks8) bnewbold@orithena$ time ./client.py import-orcid /data/orcid/partial/public_profiles_API-2.0_2017_10_json/all.json
+    (python-B2RYrks8) bnewbold@orithena$ time ./fatcat_import.py import-orcid /data/orcid/partial/public_profiles_API-2.0_2017_10_json/all.json
     real    0m47.268s
     user    0m2.616s
     sys     0m0.104s
@@ -94,11 +94,11 @@ After some simple database tuning:
 
 From compressed:
 
-    xzcat /srv/datasets/crossref-works.2018-01-21.json.xz | time parallel -j20 --round-robin --pipe ./client.py import-crossref - /srv/datasets/20180216.ISSN-to-ISSN-L.txt
+    xzcat /srv/datasets/crossref-works.2018-01-21.json.xz | time parallel -j20 --round-robin --pipe ./fatcat_import.py import-crossref - /srv/datasets/20180216.ISSN-to-ISSN-L.txt
 
 ## Manifest 
 
-    time ./client.py import-manifest /srv/datasets/idents_files_urls.sqlite
+    time ./fatcat_import.py import-manifest /srv/datasets/idents_files_urls.sqlite
 
     [...]
     Finished a batch; row 284518671 of 9669646 (2942.39%).  Total inserted: 6606900
diff --git a/python/client.py b/python/client.py
deleted file mode 100755
index 2804a210..00000000
--- a/python/client.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import argparse
-from fatcat.raw_api_client import RawFatcatApiClient
-from fatcat.crossref_importer import FatcatCrossrefImporter
-from fatcat.orcid_importer import FatcatOrcidImporter
-from fatcat.manifest_importer import FatcatManifestImporter
-from fatcat.issn_importer import FatcatIssnImporter
-
-def run_import_crossref(args):
-    fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,
-        create_containers=(not args.no_create_containers))
-    fci.process_batch(args.json_file, size=args.batch_size)
-
-def run_import_orcid(args):
-    foi = FatcatOrcidImporter(args.host_url)
-    foi.process_batch(args.json_file, size=args.batch_size)
-
-def run_import_issn(args):
-    fii = FatcatIssnImporter(args.host_url)
-    fii.process_csv_batch(args.csv_file, size=args.batch_size)
-
-def run_import_manifest(args):
-    fmi = FatcatManifestImporter(args.host_url)
-    fmi.process_db(args.db_path, size=args.batch_size)
-
-def health(args):
-    rfac = RawFatcatApiClient(args.host_url)
-    print(rfac.health())
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--debug',
-        action='store_true',
-        help="enable debugging interface")
-    parser.add_argument('--host-url',
-        default="http://localhost:9411/v0",
-        help="connect to this host/port")
-    subparsers = parser.add_subparsers()
-
-    sub_import_crossref = subparsers.add_parser('import-crossref')
-    sub_import_crossref.set_defaults(func=run_import_crossref)
-    sub_import_crossref.add_argument('json_file',
-        help="crossref JSON file to import from",
-        default=sys.stdin, type=argparse.FileType('r'))
-    sub_import_crossref.add_argument('issn_map_file',
-        help="ISSN to ISSN-L mapping file",
-        default=sys.stdin, type=argparse.FileType('r'))
-    sub_import_crossref.add_argument('--no-create-containers',
-        action='store_true',
-        help="skip creation of new container entities based on ISSN")
-    sub_import_crossref.add_argument('--batch-size',
-        help="size of batch to send",
-        default=50, type=int)
-
-    sub_import_orcid = subparsers.add_parser('import-orcid')
-    sub_import_orcid.set_defaults(func=run_import_orcid)
-    sub_import_orcid.add_argument('json_file',
-        help="orcid JSON file to import from (or stdin)",
-        default=sys.stdin, type=argparse.FileType('r'))
-    sub_import_orcid.add_argument('--batch-size',
-        help="size of batch to send",
-        default=50, type=int)
-
-    sub_import_issn = subparsers.add_parser('import-issn')
-    sub_import_issn.set_defaults(func=run_import_issn)
-    sub_import_issn.add_argument('csv_file',
-        help="Journal ISSN CSV metadata file to import from (or stdin)",
-        default=sys.stdin, type=argparse.FileType('r'))
-    sub_import_issn.add_argument('--batch-size',
-        help="size of batch to send",
-        default=50, type=int)
-
-    sub_import_manifest = subparsers.add_parser('import-manifest')
-    sub_import_manifest.set_defaults(func=run_import_manifest)
-    sub_import_manifest.add_argument('db_path',
-        help="sqlite3 database to import from",
-        type=str)
-    sub_import_manifest.add_argument('--batch-size',
-        help="size of batch to send",
-        default=50, type=int)
-
-    sub_health = subparsers.add_parser('health')
-    sub_health.set_defaults(func=health)
-
-    args = parser.parse_args()
-    if not args.__dict__.get("func"):
-        print("tell me what to do!")
-        sys.exit(-1)
-    args.func(args)
-
-if __name__ == '__main__':
-    main()
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
new file mode 100755
index 00000000..2804a210
--- /dev/null
+++ b/python/fatcat_import.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+from fatcat.raw_api_client import RawFatcatApiClient
+from fatcat.crossref_importer import FatcatCrossrefImporter
+from fatcat.orcid_importer import FatcatOrcidImporter
+from fatcat.manifest_importer import FatcatManifestImporter
+from fatcat.issn_importer import FatcatIssnImporter
+
+def run_import_crossref(args):
+    fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,
+        create_containers=(not args.no_create_containers))
+    fci.process_batch(args.json_file, size=args.batch_size)
+
+def run_import_orcid(args):
+    foi = FatcatOrcidImporter(args.host_url)
+    foi.process_batch(args.json_file, size=args.batch_size)
+
+def run_import_issn(args):
+    fii = FatcatIssnImporter(args.host_url)
+    fii.process_csv_batch(args.csv_file, size=args.batch_size)
+
+def run_import_manifest(args):
+    fmi = FatcatManifestImporter(args.host_url)
+    fmi.process_db(args.db_path, size=args.batch_size)
+
+def health(args):
+    rfac = RawFatcatApiClient(args.host_url)
+    print(rfac.health())
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--debug',
+        action='store_true',
+        help="enable debugging interface")
+    parser.add_argument('--host-url',
+        default="http://localhost:9411/v0",
+        help="connect to this host/port")
+    subparsers = parser.add_subparsers()
+
+    sub_import_crossref = subparsers.add_parser('import-crossref')
+    sub_import_crossref.set_defaults(func=run_import_crossref)
+    sub_import_crossref.add_argument('json_file',
+        help="crossref JSON file to import from",
+        default=sys.stdin, type=argparse.FileType('r'))
+    sub_import_crossref.add_argument('issn_map_file',
+        help="ISSN to ISSN-L mapping file",
+        default=sys.stdin, type=argparse.FileType('r'))
+    sub_import_crossref.add_argument('--no-create-containers',
+        action='store_true',
+        help="skip creation of new container entities based on ISSN")
+    sub_import_crossref.add_argument('--batch-size',
+        help="size of batch to send",
+        default=50, type=int)
+
+    sub_import_orcid = subparsers.add_parser('import-orcid')
+    sub_import_orcid.set_defaults(func=run_import_orcid)
+    sub_import_orcid.add_argument('json_file',
+        help="orcid JSON file to import from (or stdin)",
+        default=sys.stdin, type=argparse.FileType('r'))
+    sub_import_orcid.add_argument('--batch-size',
+        help="size of batch to send",
+        default=50, type=int)
+
+    sub_import_issn = subparsers.add_parser('import-issn')
+    sub_import_issn.set_defaults(func=run_import_issn)
+    sub_import_issn.add_argument('csv_file',
+        help="Journal ISSN CSV metadata file to import from (or stdin)",
+        default=sys.stdin, type=argparse.FileType('r'))
+    sub_import_issn.add_argument('--batch-size',
+        help="size of batch to send",
+        default=50, type=int)
+
+    sub_import_manifest = subparsers.add_parser('import-manifest')
+    sub_import_manifest.set_defaults(func=run_import_manifest)
+    sub_import_manifest.add_argument('db_path',
+        help="sqlite3 database to import from",
+        type=str)
+    sub_import_manifest.add_argument('--batch-size',
+        help="size of batch to send",
+        default=50, type=int)
+
+    sub_health = subparsers.add_parser('health')
+    sub_health.set_defaults(func=health)
+
+    args = parser.parse_args()
+    if not args.__dict__.get("func"):
+        print("tell me what to do!")
+        sys.exit(-1)
+    args.func(args)
+
+if __name__ == '__main__':
+    main()
diff --git a/python/fatcat_webface.py b/python/fatcat_webface.py
new file mode 100755
index 00000000..cfddad48
--- /dev/null
+++ b/python/fatcat_webface.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+import argparse
+from fatcat import app
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--debug',
+        action='store_true',
+        help="enable debugging interface (note: not for everything)")
+    parser.add_argument('--host',
+        default="127.0.0.1",
+        help="listen on this host/IP")
+    parser.add_argument('--port',
+        type=int,
+        default=9810,
+        help="listen on this port")
+    parser.add_argument('--database-uri',
+        default=app.config['SQLALCHEMY_DATABASE_URI'],
+        help="sqlalchemy database string")
+    parser.add_argument('--init-db',
+        action='store_true',
+        help="create database tables and insert dummy data")
+    args = parser.parse_args()
+
+    app.config['SQLALCHEMY_DATABASE_URI'] = args.database_uri
+
+    if args.init_db:
+        db.create_all()
+        fatcat.sql.populate_db()
+        print("Dummy database configured: " + app.config['SQLALCHEMY_DATABASE_URI'])
+        return
+
+    app.run(debug=args.debug, host=args.host, port=args.port)
+
+if __name__ == '__main__':
+    main()
diff --git a/python/run.py b/python/run.py
deleted file mode 100755
index cfddad48..00000000
--- a/python/run.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fatcat import app
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--debug',
-        action='store_true',
-        help="enable debugging interface (note: not for everything)")
-    parser.add_argument('--host',
-        default="127.0.0.1",
-        help="listen on this host/IP")
-    parser.add_argument('--port',
-        type=int,
-        default=9810,
-        help="listen on this port")
-    parser.add_argument('--database-uri',
-        default=app.config['SQLALCHEMY_DATABASE_URI'],
-        help="sqlalchemy database string")
-    parser.add_argument('--init-db',
-        action='store_true',
-        help="create database tables and insert dummy data")
-    args = parser.parse_args()
-
-    app.config['SQLALCHEMY_DATABASE_URI'] = args.database_uri
-
-    if args.init_db:
-        db.create_all()
-        fatcat.sql.populate_db()
-        print("Dummy database configured: " + app.config['SQLALCHEMY_DATABASE_URI'])
-        return
-
-    app.run(debug=args.debug, host=args.host, port=args.port)
-
-if __name__ == '__main__':
-    main()
-- 
cgit v1.2.3