diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 |
commit | 279b22e30d9b590838268f5f5acdaa1110ee593a (patch) | |
tree | c9965a089be1b8ef607573ea9261c0c378c0ab47 /python/fatcat_tools | |
parent | 7ebda2e051b51e49544ab75673b19ec5f27d9d45 (diff) | |
download | fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.tar.gz fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.zip |
shuffle around fatcat_tools layout
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/common.py (renamed from python/fatcat_tools/importer_common.py) | 0 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py (renamed from python/fatcat_tools/crossref_importer.py) | 2 | ||||
-rw-r--r--[-rwxr-xr-x] | python/fatcat_tools/importers/grobid_metadata.py (renamed from python/fatcat_tools/grobid_metadata_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/issn.py (renamed from python/fatcat_tools/issn_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py (renamed from python/fatcat_tools/matched_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/orcid.py (renamed from python/fatcat_tools/orcid_importer.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/raw_api_client.py | 66 | ||||
-rw-r--r-- | python/fatcat_tools/workers/changelog.py (renamed from python/fatcat_tools/changelog_workers.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/workers/elastic.py (renamed from python/fatcat_tools/elastic_workers.py) | 2 | ||||
-rw-r--r-- | python/fatcat_tools/workers/worker_common.py (renamed from python/fatcat_tools/worker_common.py) | 0 |
10 files changed, 7 insertions, 73 deletions
diff --git a/python/fatcat_tools/importer_common.py b/python/fatcat_tools/importers/common.py index 8dfee875..8dfee875 100644 --- a/python/fatcat_tools/importer_common.py +++ b/python/fatcat_tools/importers/common.py diff --git a/python/fatcat_tools/crossref_importer.py b/python/fatcat_tools/importers/crossref.py index 6a5ad824..dddb58d1 100644 --- a/python/fatcat_tools/crossref_importer.py +++ b/python/fatcat_tools/importers/crossref.py @@ -5,7 +5,7 @@ import sqlite3 import datetime import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter class FatcatCrossrefImporter(FatcatImporter): diff --git a/python/fatcat_tools/grobid_metadata_importer.py b/python/fatcat_tools/importers/grobid_metadata.py index effa0d94..56b2ee02 100755..100644 --- a/python/fatcat_tools/grobid_metadata_importer.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -5,7 +5,7 @@ import json import base64 import datetime import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter MAX_ABSTRACT_BYTES=4096 diff --git a/python/fatcat_tools/issn_importer.py b/python/fatcat_tools/importers/issn.py index e3ed7382..d7fb9082 100644 --- a/python/fatcat_tools/issn_importer.py +++ b/python/fatcat_tools/importers/issn.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter # CSV format (generated from git.archive.org/webgroup/oa-journal-analysis): # ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count diff --git a/python/fatcat_tools/matched_importer.py b/python/fatcat_tools/importers/matched.py index 627ab6f1..6270fe88 100644 --- a/python/fatcat_tools/matched_importer.py +++ b/python/fatcat_tools/importers/matched.py @@ -4,7 +4,7 @@ import json import sqlite3 import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter #row = row.split('\t') #assert len(row) == 2 diff --git a/python/fatcat_tools/orcid_importer.py b/python/fatcat_tools/importers/orcid.py index f2366c66..350c4c57 100644 --- a/python/fatcat_tools/orcid_importer.py +++ b/python/fatcat_tools/importers/orcid.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter def value_or_none(e): if type(e) == dict: diff --git a/python/fatcat_tools/raw_api_client.py b/python/fatcat_tools/raw_api_client.py deleted file mode 100644 index 75151ebb..00000000 --- a/python/fatcat_tools/raw_api_client.py +++ /dev/null @@ -1,66 +0,0 @@ - -import sys -import json -import requests - - -class RawFatcatApiClient: - - def __init__(self, host_url): - self.host_url = host_url - self.session = requests.Session() - self._issn_map = dict() - - def get(self, path, data=None): - headers = {"content-type": "application/json"} - return self.session.get(self.host_url + path, json=data, - headers=headers) - - def post(self, path, data=None): - headers = {"content-type": "application/json"} - return self.session.post(self.host_url + path, json=data, - headers=headers) - - def new_editgroup(self): - rv = self.post('/v0/editgroup', data=dict( - editor_id=1)) - print(rv) - print(rv.json()) - assert rv.status_code == 201 - editgroup_id = rv.json()['id'] - return editgroup_id - - def accept_editgroup(self, eg): - rv = self.post('/v0/editgroup/{}/accept'.format(eg)) - assert rv.status_code == 200 - return rv - - def import_issn_file(self, json_file, create_containers=False, batchsize=100): - eg = self.new_editgroup() - i = 0 - with open(json_file, 'r') as file: - for line in file: - if i % batchsize == 0: - sys.stdout.write('\n{}: '.format(i)) - if (i+1) % 20 == 0: - sys.stdout.write('.') - i = i + 1 - obj = json.loads(line) - if not ("author" in obj and "title" in obj): - continue - try: - self.import_crossref_dict(obj, editgroup=eg, - create_containers=create_containers) - except Exception as e: - print("ERROR: {}".format(e)) - if i % batchsize == 0: - self.accept_editgroup(eg) - eg = self.new_editgroup() - if i % batchsize != 0: - self.accept_editgroup(eg) - print("done!") - - def health(self): - rv = self.get("/health") - assert rv.status_code == 200 - return rv.json() diff --git a/python/fatcat_tools/changelog_workers.py b/python/fatcat_tools/workers/changelog.py index 223d4478..92bb8bdd 100644 --- a/python/fatcat_tools/changelog_workers.py +++ b/python/fatcat_tools/workers/changelog.py @@ -2,7 +2,7 @@ import json import time from itertools import islice -from fatcat_tools.worker_common import FatcatWorker +from fatcat_tools.workers.worker_common import FatcatWorker from pykafka.common import OffsetType diff --git a/python/fatcat_tools/elastic_workers.py b/python/fatcat_tools/workers/elastic.py index eac8d6b0..46632792 100644 --- a/python/fatcat_tools/elastic_workers.py +++ b/python/fatcat_tools/workers/elastic.py @@ -2,7 +2,7 @@ import json import time import requests -from fatcat_tools.worker_common import FatcatWorker +from fatcat_tools.workers.worker_common import FatcatWorker from fatcat_client.models import ReleaseEntity from fatcat_tools.transforms import * from pykafka.common import OffsetType diff --git a/python/fatcat_tools/worker_common.py b/python/fatcat_tools/workers/worker_common.py index 77ea2c15..77ea2c15 100644 --- a/python/fatcat_tools/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py |