aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-13 11:32:41 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-13 11:32:41 -0800
commit279b22e30d9b590838268f5f5acdaa1110ee593a (patch)
treec9965a089be1b8ef607573ea9261c0c378c0ab47 /python/fatcat_tools
parent7ebda2e051b51e49544ab75673b19ec5f27d9d45 (diff)
downloadfatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.tar.gz
fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.zip
shuffle around fatcat_tools layout
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/common.py (renamed from python/fatcat_tools/importer_common.py)0
-rw-r--r--python/fatcat_tools/importers/crossref.py (renamed from python/fatcat_tools/crossref_importer.py)2
-rw-r--r--[-rwxr-xr-x]python/fatcat_tools/importers/grobid_metadata.py (renamed from python/fatcat_tools/grobid_metadata_importer.py)2
-rw-r--r--python/fatcat_tools/importers/issn.py (renamed from python/fatcat_tools/issn_importer.py)2
-rw-r--r--python/fatcat_tools/importers/matched.py (renamed from python/fatcat_tools/matched_importer.py)2
-rw-r--r--python/fatcat_tools/importers/orcid.py (renamed from python/fatcat_tools/orcid_importer.py)2
-rw-r--r--python/fatcat_tools/raw_api_client.py66
-rw-r--r--python/fatcat_tools/workers/changelog.py (renamed from python/fatcat_tools/changelog_workers.py)2
-rw-r--r--python/fatcat_tools/workers/elastic.py (renamed from python/fatcat_tools/elastic_workers.py)2
-rw-r--r--python/fatcat_tools/workers/worker_common.py (renamed from python/fatcat_tools/worker_common.py)0
10 files changed, 7 insertions, 73 deletions
diff --git a/python/fatcat_tools/importer_common.py b/python/fatcat_tools/importers/common.py
index 8dfee875..8dfee875 100644
--- a/python/fatcat_tools/importer_common.py
+++ b/python/fatcat_tools/importers/common.py
diff --git a/python/fatcat_tools/crossref_importer.py b/python/fatcat_tools/importers/crossref.py
index 6a5ad824..dddb58d1 100644
--- a/python/fatcat_tools/crossref_importer.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -5,7 +5,7 @@ import sqlite3
import datetime
import itertools
import fatcat_client
-from fatcat_tools.importer_common import FatcatImporter
+from fatcat_tools.importers.common import FatcatImporter
class FatcatCrossrefImporter(FatcatImporter):
diff --git a/python/fatcat_tools/grobid_metadata_importer.py b/python/fatcat_tools/importers/grobid_metadata.py
index effa0d94..56b2ee02 100755..100644
--- a/python/fatcat_tools/grobid_metadata_importer.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -5,7 +5,7 @@ import json
import base64
import datetime
import fatcat_client
-from fatcat_tools.importer_common import FatcatImporter
+from fatcat_tools.importers.common import FatcatImporter
MAX_ABSTRACT_BYTES=4096
diff --git a/python/fatcat_tools/issn_importer.py b/python/fatcat_tools/importers/issn.py
index e3ed7382..d7fb9082 100644
--- a/python/fatcat_tools/issn_importer.py
+++ b/python/fatcat_tools/importers/issn.py
@@ -3,7 +3,7 @@ import sys
import json
import itertools
import fatcat_client
-from fatcat_tools.importer_common import FatcatImporter
+from fatcat_tools.importers.common import FatcatImporter
# CSV format (generated from git.archive.org/webgroup/oa-journal-analysis):
# ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count
diff --git a/python/fatcat_tools/matched_importer.py b/python/fatcat_tools/importers/matched.py
index 627ab6f1..6270fe88 100644
--- a/python/fatcat_tools/matched_importer.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -4,7 +4,7 @@ import json
import sqlite3
import itertools
import fatcat_client
-from fatcat_tools.importer_common import FatcatImporter
+from fatcat_tools.importers.common import FatcatImporter
#row = row.split('\t')
#assert len(row) == 2
diff --git a/python/fatcat_tools/orcid_importer.py b/python/fatcat_tools/importers/orcid.py
index f2366c66..350c4c57 100644
--- a/python/fatcat_tools/orcid_importer.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -3,7 +3,7 @@ import sys
import json
import itertools
import fatcat_client
-from fatcat_tools.importer_common import FatcatImporter
+from fatcat_tools.importers.common import FatcatImporter
def value_or_none(e):
if type(e) == dict:
diff --git a/python/fatcat_tools/raw_api_client.py b/python/fatcat_tools/raw_api_client.py
deleted file mode 100644
index 75151ebb..00000000
--- a/python/fatcat_tools/raw_api_client.py
+++ /dev/null
@@ -1,66 +0,0 @@
-
-import sys
-import json
-import requests
-
-
-class RawFatcatApiClient:
-
- def __init__(self, host_url):
- self.host_url = host_url
- self.session = requests.Session()
- self._issn_map = dict()
-
- def get(self, path, data=None):
- headers = {"content-type": "application/json"}
- return self.session.get(self.host_url + path, json=data,
- headers=headers)
-
- def post(self, path, data=None):
- headers = {"content-type": "application/json"}
- return self.session.post(self.host_url + path, json=data,
- headers=headers)
-
- def new_editgroup(self):
- rv = self.post('/v0/editgroup', data=dict(
- editor_id=1))
- print(rv)
- print(rv.json())
- assert rv.status_code == 201
- editgroup_id = rv.json()['id']
- return editgroup_id
-
- def accept_editgroup(self, eg):
- rv = self.post('/v0/editgroup/{}/accept'.format(eg))
- assert rv.status_code == 200
- return rv
-
- def import_issn_file(self, json_file, create_containers=False, batchsize=100):
- eg = self.new_editgroup()
- i = 0
- with open(json_file, 'r') as file:
- for line in file:
- if i % batchsize == 0:
- sys.stdout.write('\n{}: '.format(i))
- if (i+1) % 20 == 0:
- sys.stdout.write('.')
- i = i + 1
- obj = json.loads(line)
- if not ("author" in obj and "title" in obj):
- continue
- try:
- self.import_crossref_dict(obj, editgroup=eg,
- create_containers=create_containers)
- except Exception as e:
- print("ERROR: {}".format(e))
- if i % batchsize == 0:
- self.accept_editgroup(eg)
- eg = self.new_editgroup()
- if i % batchsize != 0:
- self.accept_editgroup(eg)
- print("done!")
-
- def health(self):
- rv = self.get("/health")
- assert rv.status_code == 200
- return rv.json()
diff --git a/python/fatcat_tools/changelog_workers.py b/python/fatcat_tools/workers/changelog.py
index 223d4478..92bb8bdd 100644
--- a/python/fatcat_tools/changelog_workers.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -2,7 +2,7 @@
import json
import time
from itertools import islice
-from fatcat_tools.worker_common import FatcatWorker
+from fatcat_tools.workers.worker_common import FatcatWorker
from pykafka.common import OffsetType
diff --git a/python/fatcat_tools/elastic_workers.py b/python/fatcat_tools/workers/elastic.py
index eac8d6b0..46632792 100644
--- a/python/fatcat_tools/elastic_workers.py
+++ b/python/fatcat_tools/workers/elastic.py
@@ -2,7 +2,7 @@
import json
import time
import requests
-from fatcat_tools.worker_common import FatcatWorker
+from fatcat_tools.workers.worker_common import FatcatWorker
from fatcat_client.models import ReleaseEntity
from fatcat_tools.transforms import *
from pykafka.common import OffsetType
diff --git a/python/fatcat_tools/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index 77ea2c15..77ea2c15 100644
--- a/python/fatcat_tools/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py