aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat/api_client.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat/api_client.py')
-rw-r--r--python/fatcat/api_client.py175
1 files changed, 175 insertions, 0 deletions
diff --git a/python/fatcat/api_client.py b/python/fatcat/api_client.py
new file mode 100644
index 00000000..f2fd6a1d
--- /dev/null
+++ b/python/fatcat/api_client.py
@@ -0,0 +1,175 @@
+
+import sys
+import json
+import requests
+
+
+class FatCatApiClient:
+
+ def __init__(self, host_url):
+ self.host_url = host_url
+ self.session = requests.Session()
+ self._issn_map = dict()
+
+ def get(self, path, data=None):
+ headers = {"content-type": "application/json"}
+ return self.session.get(self.host_url + path, json=data,
+ headers=headers)
+
+ def post(self, path, data=None):
+ headers = {"content-type": "application/json"}
+ return self.session.post(self.host_url + path, json=data,
+ headers=headers)
+
+ def new_editgroup(self):
+ rv = self.post('/v0/editgroup', data=dict(
+ editor=1))
+ assert rv.status_code == 200
+ editgroup_id = rv.json()['id']
+ return editgroup_id
+
+ def accept_editgroup(self, eg):
+ rv = self.post('/v0/editgroup/{}/accept'.format(eg))
+ assert rv.status_code == 200
+ return rv
+
+ def lookup_issn(self, issn):
+ assert len(issn) == 9 and issn[4] == '-'
+ if issn in self._issn_map:
+ return self._issn_map[issn]
+ rv = self.get('/v0/container/lookup', data=dict(issn=issn))
+ container_id = None
+ if rv.status_code == 200:
+ container_id = rv.json()['id']
+ else:
+ # only other valid response is a 404; otherwise we had an error
+ assert rv.status_code == 404
+ self._issn_map[issn] = container_id
+ return container_id
+
+ def import_crossref_file(self, json_file, create_containers=False, batchsize=100):
+ eg = self.new_editgroup()
+ i = 0
+ with open(json_file, 'r') as file:
+ for line in file:
+ if i % batchsize == 0:
+ sys.stdout.write('\n{}: '.format(i))
+ if (i+1) % 20 == 0:
+ sys.stdout.write('.')
+ i = i + 1
+ obj = json.loads(line)
+ if not ("author" in obj and "title" in obj):
+ continue
+ try:
+ self.import_crossref_dict(obj, editgroup=eg,
+ create_containers=create_containers)
+ except Exception as e:
+ print("ERROR: {}".format(e))
+ if i % batchsize == 0:
+ self.accept_editgroup(eg)
+ eg = self.new_editgroup()
+ if i % batchsize != 0:
+ self.accept_editgroup(eg)
+ print("done!")
+
+ def import_crossref_dict(self, meta, editgroup=None,
+ create_containers=False):
+
+ # creators
+ creators = []
+ for am in meta['author']:
+ c = dict(name="{} {}".format(am['given'], am['family']),
+ sortname="{}, {}".format(am['family'], am['given']),
+ orcid=None)
+ creators.append(c)
+
+ # container
+ issn = meta.get('ISSN', [None])[0]
+ container_id = self.lookup_issn(issn)
+ container = dict(
+ issn=issn,
+ name=meta['container-title'][0],
+ container=container_id,
+ #sortname=meta['short-container-title'][0])
+ publisher=meta['publisher'])
+
+ if container_id is None and create_containers and issn != None:
+ rv = self.post('/v0/container', data=dict(
+ issn=container['issn'],
+ publisher=container['publisher']))
+ assert rv.status_code == 200
+ container_id = rv.json()['id']
+ print("created container: {}".format(issn))
+ container['id'] = container_id
+ self._issn_map[issn] = container_id
+
+ # references
+ refs = []
+ for i, rm in enumerate(meta.get('reference', [])):
+ ref = dict(
+ doi=rm.get("DOI", None),
+ index=i+1,
+ # TODO: how to generate a proper stub here from k/v metadata?
+ stub="| ".join(rm.values()))
+ refs.append(ref)
+
+ # work and release
+ title = meta['title'][0]
+ rv = self.post('/v0/work',
+ data=dict(title=title, editgroup=editgroup)) #work_type="book"
+ assert rv.status_code == 200
+ work_id = rv.json()['id']
+
+ extra = dict(crossref={
+ 'links': meta.get('link', []),
+ 'subject': meta.get('subject'),
+ 'type': meta['type'],
+ 'alternative-id': meta.get('alternative-id', [])})
+
+ rv = self.post('/v0/release', data=dict(
+ title=title,
+ work=work_id,
+ # XXX: creators=creators,
+ # XXX: refs=refs,
+ # XXX: container=container_id,
+ release_type=meta['type'],
+ doi=meta['DOI'],
+ date=meta['created']['date-time'],
+ license=meta.get('license', [dict(URL=None)])[0]['URL'] or None,
+ issue=meta.get('issue', None),
+ volume=meta.get('volume', None),
+ pages=meta.get('page', None),
+ editgroup=editgroup,
+ extra=extra))
+ assert rv.status_code == 200
+ release_id = rv.json()['id']
+
+ def import_issn_file(self, json_file, create_containers=False, batchsize=100):
+ eg = self.new_editgroup()
+ i = 0
+ with open(json_file, 'r') as file:
+ for line in file:
+ if i % batchsize == 0:
+ sys.stdout.write('\n{}: '.format(i))
+ if (i+1) % 20 == 0:
+ sys.stdout.write('.')
+ i = i + 1
+ obj = json.loads(line)
+ if not ("author" in obj and "title" in obj):
+ continue
+ try:
+ self.import_crossref_dict(obj, editgroup=eg,
+ create_containers=create_containers)
+ except Exception as e:
+ print("ERROR: {}".format(e))
+ if i % batchsize == 0:
+ self.accept_editgroup(eg)
+ eg = self.new_editgroup()
+ if i % batchsize != 0:
+ self.accept_editgroup(eg)
+ print("done!")
+
+ def health(self):
+ rv = self.get("/health")
+ assert rv.status_code == 200
+ return rv.json()