From d304edc994bd1c0620c500a1cda8b948051f84f1 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 28 Jun 2018 12:32:43 -0700 Subject: crossref_importer: auto-create work entities This now means that work_type isn't populated, but imports should run significantly faster. --- python/fatcat/crossref_importer.py | 30 ++++++++++++++++----------- python/fatcat_client/models/release_entity.py | 5 ++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index 18dd2498..85062c5d 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -84,11 +84,6 @@ class FatcatCrossrefImporter(FatcatImporter): # TODO: just dump JSON somewhere here? raw=rm.get('unstructured'))) - # work - we = fatcat_client.WorkEntity( - work_type=obj['type'], - ) - # release extra = dict(crossref={ 'links': obj.get('link', []), @@ -98,7 +93,7 @@ class FatcatCrossrefImporter(FatcatImporter): 'alternative-id': obj.get('alternative-id', [])}) re = fatcat_client.ReleaseEntity( - work_id='tbd', # gets set later, I promise! + work_id=None, title=obj['title'][0], contribs=contribs, refs=refs, @@ -110,7 +105,7 @@ class FatcatCrossrefImporter(FatcatImporter): volume=obj.get('volume'), pages=obj.get('page'), extra=extra) - return (we, re, ce) + return (re, ce) def create_row(self, row, editgroup_id=None): if row is None: @@ -118,20 +113,31 @@ class FatcatCrossrefImporter(FatcatImporter): obj = json.loads(row) entities = self.parse_crossref_dict(obj) if entities is not None: - (we, re, ce) = entities - we.editgroup_id = editgroup_id + (re, ce) = entities re.editgroup_id = editgroup_id if ce is not None: ce.editgroup_id = editgroup_id container = self.api.create_container(ce) re.container_id = container.ident self._issnl_id_map[ce.issnl] = container.ident - created = self.api.create_work(we) - re.work_id = created.ident self.api.create_release(re) def create_batch(self, batch, editgroup_id=None): """Current work/release pairing disallows batch creation of releases. Could do batch work creation and then match against releases, but meh.""" + release_batch = [] for row in batch: - self.create_row(row, editgroup_id) + if row is None: + continue + obj = json.loads(row) + entities = self.parse_crossref_dict(obj) + if entities is not None: + (re, ce) = entities + re.editgroup_id = editgroup_id + if ce is not None: + ce.editgroup_id = editgroup_id + container = self.api.create_container(ce) + re.container_id = container.ident + self._issnl_id_map[ce.issnl] = container.ident + release_batch.append(re) + self.api.create_release_batch(release_batch) diff --git a/python/fatcat_client/models/release_entity.py b/python/fatcat_client/models/release_entity.py index d74b9823..c28d03f7 100644 --- a/python/fatcat_client/models/release_entity.py +++ b/python/fatcat_client/models/release_entity.py @@ -133,7 +133,8 @@ class ReleaseEntity(object): self.release_type = release_type if container_id is not None: self.container_id = container_id - self.work_id = work_id + if work_id is not None: + self.work_id = work_id self.title = title if state is not None: self.state = state @@ -441,8 +442,6 @@ class ReleaseEntity(object): :param work_id: The work_id of this ReleaseEntity. # noqa: E501 :type: str """ - if work_id is None: - raise ValueError("Invalid value for `work_id`, must not be `None`") # noqa: E501 self._work_id = work_id -- cgit v1.2.3