summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-04-25 13:45:53 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-04-25 13:54:43 -0700
commit840966fcd3170bce4961bd5a51727af1be38e5ea (patch)
treebddc52b1d823910d85aef9e939b4d54986e3b912
parent8c7258205fad8230e236709c20469ff390426cbf (diff)
downloadfatcat-840966fcd3170bce4961bd5a51727af1be38e5ea.tar.gz
fatcat-840966fcd3170bce4961bd5a51727af1be38e5ea.zip
have crossref script create containers
-rw-r--r--fatcat/api.py2
-rw-r--r--fatcat/api_client.py64
-rwxr-xr-xfatcat_client.py8
3 files changed, 52 insertions, 22 deletions
diff --git a/fatcat/api.py b/fatcat/api.py
index 0712081e..9cbb1939 100644
--- a/fatcat/api.py
+++ b/fatcat/api.py
@@ -226,7 +226,7 @@ def api_editgroup_get(ident):
.join(EditGroup.editor)\
.filter(EditGroup.id==ident)\
.first_or_404()
- # TODO: fill in all the related edit types...
+ # XXX: fill in all the related edit types...
return editgroup_schema.jsonify(entity)
@app.route('/v0/editgroup', methods=['POST'])
diff --git a/fatcat/api_client.py b/fatcat/api_client.py
index 46297163..f997f5fc 100644
--- a/fatcat/api_client.py
+++ b/fatcat/api_client.py
@@ -9,9 +9,12 @@ class FatCatApiClient:
def __init__(self, host_url):
self.host_url = host_url
self.session = requests.Session()
+ self._issn_map = dict()
- def get(self, path):
- return self.session.get(self.host_url + path)
+ def get(self, path, data=None):
+ headers = {"content-type": "application/json"}
+ return self.session.get(self.host_url + path, json=data,
+ headers=headers)
def post(self, path, data=None):
headers = {"content-type": "application/json"}
@@ -30,7 +33,21 @@ class FatCatApiClient:
assert rv.status_code == 200
return rv
- def import_crossref_file(self, json_file):
+ def lookup_issn(self, issn):
+ assert len(issn) == 9 and issn[4] == '-'
+ if issn in self._issn_map:
+ return self._issn_map[issn]
+ rv = self.get('/v0/container/lookup', data=dict(issn=issn))
+ container_id = None
+ if rv.status_code == 200:
+ container_id = rv.json()['id']
+ else:
+ # only other valid response is a 404; otherwise we had an error
+ assert rv.status_code == 404
+ self._issn_map[issn] = container_id
+ return container_id
+
+ def import_crossref_file(self, json_file, create_containers=False):
eg = self.new_editgroup()
i = 0
with open(json_file, 'r') as file:
@@ -44,14 +61,16 @@ class FatCatApiClient:
if not ("author" in obj and "title" in obj):
continue
try:
- self.import_crossref_dict(obj, editgroup=eg)
+ self.import_crossref_dict(obj, editgroup=eg,
+ create_containers=create_containers)
except Exception as e:
print("ERROR: {}".format(e))
if i % 1000 != 0:
self.accept_editgroup(eg)
print("done!")
- def import_crossref_dict(self, meta, editgroup=None, do_extra=False):
+ def import_crossref_dict(self, meta, editgroup=None,
+ create_containers=False):
# creators
creators = []
@@ -62,15 +81,24 @@ class FatCatApiClient:
creators.append(c)
# container
+ issn = meta.get('ISSN', [None])[0]
+ container_id = self.lookup_issn(issn)
container = dict(
- issn=meta.get('ISSN', [None])[0],
+ issn=issn,
name=meta['container-title'][0],
- #container_id=None,
+ container=container_id,
#sortname=meta['short-container-title'][0])
publisher=meta['publisher'])
- #rv = self.post('/v0/container', data=container)
- #assert rv.status_code == 200
- #container_id = rv.json()['id']
+
+ if container_id is None and create_containers and issn != None:
+ rv = self.post('/v0/container', data=dict(
+ issn=container['issn'],
+ publisher=container['publisher']))
+ assert rv.status_code == 200
+ container_id = rv.json()['id']
+ print("created container: {}".format(issn))
+ container['id'] = container_id
+ self._issn_map[issn] = container_id
# references
refs = []
@@ -89,20 +117,18 @@ class FatCatApiClient:
assert rv.status_code == 200
work_id = rv.json()['id']
- if do_extra:
- extra = dict(crossref={
- 'links': meta.get('link', []),
- 'subject': meta.get('subject'),
- 'type': meta['type'],
- 'alternative-id': meta.get('alternative-id', [])})
- else:
- extra = None
+ extra = dict(crossref={
+ 'links': meta.get('link', []),
+ 'subject': meta.get('subject'),
+ 'type': meta['type'],
+ 'alternative-id': meta.get('alternative-id', [])})
+
rv = self.post('/v0/release', data=dict(
title=title,
work=work_id,
# XXX: creators=creators,
# XXX: refs=refs,
- #container=container_id,
+ # XXX: container=container_id,
release_type=meta['type'],
doi=meta['DOI'],
date=meta['created']['date-time'],
diff --git a/fatcat_client.py b/fatcat_client.py
index d20400e7..d1580be5 100755
--- a/fatcat_client.py
+++ b/fatcat_client.py
@@ -5,7 +5,8 @@ from fatcat.api_client import FatCatApiClient
def import_crossref(args):
fcc = FatCatApiClient(args.host_url)
- fcc.import_crossref_file(args.json_file)
+ fcc.import_crossref_file(args.json_file,
+ create_containers=args.create_containers)
def health(args):
fcc = FatCatApiClient(args.host_url)
@@ -25,7 +26,10 @@ def main():
aliases=['ic'])
sub_import_crossref.set_defaults(func=import_crossref)
sub_import_crossref.add_argument('json_file',
- help="")
+ help="crossref JSON file to import from")
+ sub_import_crossref.add_argument('--create-containers',
+ action='store_true',
+ help="if true, create containers based on ISSN")
sub_health = subparsers.add_parser('health')
sub_health.set_defaults(func=health)