summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-06-28 12:32:43 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-06-28 12:34:01 -0700
commitd304edc994bd1c0620c500a1cda8b948051f84f1 (patch)
treefc2f707577e240d0cd8ccf3af845834afde46f79 /python
parentbfb137bed3ab6ac63b24815b7e11f23645b78316 (diff)
downloadfatcat-d304edc994bd1c0620c500a1cda8b948051f84f1.tar.gz
fatcat-d304edc994bd1c0620c500a1cda8b948051f84f1.zip
crossref_importer: auto-create work entities
This now means that work_type isn't populated, but imports should run significantly faster.
Diffstat (limited to 'python')
-rw-r--r--python/fatcat/crossref_importer.py30
-rw-r--r--python/fatcat_client/models/release_entity.py5
2 files changed, 20 insertions, 15 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index 18dd2498..85062c5d 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -84,11 +84,6 @@ class FatcatCrossrefImporter(FatcatImporter):
# TODO: just dump JSON somewhere here?
raw=rm.get('unstructured')))
- # work
- we = fatcat_client.WorkEntity(
- work_type=obj['type'],
- )
-
# release
extra = dict(crossref={
'links': obj.get('link', []),
@@ -98,7 +93,7 @@ class FatcatCrossrefImporter(FatcatImporter):
'alternative-id': obj.get('alternative-id', [])})
re = fatcat_client.ReleaseEntity(
- work_id='tbd', # gets set later, I promise!
+ work_id=None,
title=obj['title'][0],
contribs=contribs,
refs=refs,
@@ -110,7 +105,7 @@ class FatcatCrossrefImporter(FatcatImporter):
volume=obj.get('volume'),
pages=obj.get('page'),
extra=extra)
- return (we, re, ce)
+ return (re, ce)
def create_row(self, row, editgroup_id=None):
if row is None:
@@ -118,20 +113,31 @@ class FatcatCrossrefImporter(FatcatImporter):
obj = json.loads(row)
entities = self.parse_crossref_dict(obj)
if entities is not None:
- (we, re, ce) = entities
- we.editgroup_id = editgroup_id
+ (re, ce) = entities
re.editgroup_id = editgroup_id
if ce is not None:
ce.editgroup_id = editgroup_id
container = self.api.create_container(ce)
re.container_id = container.ident
self._issnl_id_map[ce.issnl] = container.ident
- created = self.api.create_work(we)
- re.work_id = created.ident
self.api.create_release(re)
def create_batch(self, batch, editgroup_id=None):
"""Current work/release pairing disallows batch creation of releases.
Could do batch work creation and then match against releases, but meh."""
+ release_batch = []
for row in batch:
- self.create_row(row, editgroup_id)
+ if row is None:
+ continue
+ obj = json.loads(row)
+ entities = self.parse_crossref_dict(obj)
+ if entities is not None:
+ (re, ce) = entities
+ re.editgroup_id = editgroup_id
+ if ce is not None:
+ ce.editgroup_id = editgroup_id
+ container = self.api.create_container(ce)
+ re.container_id = container.ident
+ self._issnl_id_map[ce.issnl] = container.ident
+ release_batch.append(re)
+ self.api.create_release_batch(release_batch)
diff --git a/python/fatcat_client/models/release_entity.py b/python/fatcat_client/models/release_entity.py
index d74b9823..c28d03f7 100644
--- a/python/fatcat_client/models/release_entity.py
+++ b/python/fatcat_client/models/release_entity.py
@@ -133,7 +133,8 @@ class ReleaseEntity(object):
self.release_type = release_type
if container_id is not None:
self.container_id = container_id
- self.work_id = work_id
+ if work_id is not None:
+ self.work_id = work_id
self.title = title
if state is not None:
self.state = state
@@ -441,8 +442,6 @@ class ReleaseEntity(object):
:param work_id: The work_id of this ReleaseEntity. # noqa: E501
:type: str
"""
- if work_id is None:
- raise ValueError("Invalid value for `work_id`, must not be `None`") # noqa: E501
self._work_id = work_id