summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/common.py12
-rw-r--r--python/fatcat_tools/importers/crossref.py14
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py10
-rw-r--r--python/fatcat_tools/importers/issn.py8
-rw-r--r--python/fatcat_tools/importers/matched.py20
-rw-r--r--python/fatcat_tools/importers/orcid.py8
6 files changed, 36 insertions, 36 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 65976a21..e31cabf8 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -54,11 +54,11 @@ class FatcatImporter:
print("Processed {} lines, inserted {}, updated {}.".format(
self.counts['processed_lines'], self.counts['insert'], self.counts['update']))
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
# sub-classes expected to implement this
raise NotImplementedError
- def create_batch(self, rows, editgroup=None):
+ def create_batch(self, rows, editgroup_id=None):
# sub-classes expected to implement this
raise NotImplementedError
@@ -68,14 +68,14 @@ class FatcatImporter:
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
i = 0
for i, row in enumerate(source):
- self.create_row(row, editgroup=eg.id)
+ self.create_row(row, editgroup_id=eg.editgroup_id)
if i > 0 and (i % group_size) == 0:
- self.api.accept_editgroup(eg.id)
+ self.api.accept_editgroup(eg.editgroup_id)
eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
self.counts['processed_lines'] += 1
if i == 0 or (i % group_size) != 0:
- self.api.accept_editgroup(eg.id)
+ self.api.accept_editgroup(eg.editgroup_id)
def process_batch(self, source, size=50, decode_kafka=False):
"""Reads and processes in batches (not API-call-per-)"""
@@ -85,7 +85,7 @@ class FatcatImporter:
self.counts['processed_lines'] += len(rows)
eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
- self.create_batch(rows, editgroup=eg.id)
+ self.create_batch(rows, editgroup_id=eg.editgroup_id)
def process_csv_source(self, source, group_size=100, delimiter=','):
reader = csv.DictReader(source, delimiter=delimiter)
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 13179207..d4d0de68 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -287,7 +287,7 @@ class CrossrefImporter(FatcatImporter):
extra=dict(crossref=extra))
return (re, ce)
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
if row is None:
return
obj = json.loads(row)
@@ -295,13 +295,13 @@ class CrossrefImporter(FatcatImporter):
if entities is not None:
(re, ce) = entities
if ce is not None:
- container = self.api.create_container(ce, editgroup=editgroup)
+ container = self.api.create_container(ce, editgroup_id=editgroup_id)
re.container_id = container.ident
self._issnl_id_map[ce.issnl] = container.ident
- self.api.create_release(re, editgroup=editgroup)
+ self.api.create_release(re, editgroup_id=editgroup_id)
self.counts['insert'] += 1
- def create_batch(self, batch, editgroup=None):
+ def create_batch(self, batch, editgroup_id=None):
"""Current work/release pairing disallows batch creation of releases.
Could do batch work creation and then match against releases, but meh."""
release_batch = []
@@ -315,10 +315,10 @@ class CrossrefImporter(FatcatImporter):
if ce is not None:
ce_eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
- container = self.api.create_container(ce, editgroup=ce_eg.id)
- self.api.accept_editgroup(ce_eg.id)
+ container = self.api.create_container(ce, editgroup_id=ce_eg.editgroup_id)
+ self.api.accept_editgroup(ce_eg.editgroup_id)
re.container_id = container.ident
self._issnl_id_map[ce.issnl] = container.ident
release_batch.append(re)
- self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup)
+ self.api.create_release_batch(release_batch, autoaccept="true", editgroup_id=editgroup_id)
self.counts['insert'] += len(release_batch)
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 47a753a6..2cb97b01 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -130,7 +130,7 @@ class GrobidMetadataImporter(FatcatImporter):
sha1=sha1,
size=int(file_size),
mimetype=mimetype,
- releases=[],
+ release_ids=[],
urls=[],
)
@@ -147,7 +147,7 @@ class GrobidMetadataImporter(FatcatImporter):
return fe
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
if not row:
return
fields = row.split('\t')
@@ -159,11 +159,11 @@ class GrobidMetadataImporter(FatcatImporter):
fe = self.parse_file_metadata(sha1_key, cdx, mimetype, file_size)
re = self.parse_grobid_json(grobid_meta)
if fe and re:
- release_entity = self.api.create_release(re, editgroup=editgroup)
+ release_entity = self.api.create_release(re, editgroup_id=editgroup_id)
# release ident can't already be in release list because we just
# created it
- fe.releases.append(release_entity.ident)
- file_entity = self.api.create_file(fe, editgroup=editgroup)
+ fe.release_ids.append(release_entity.ident)
+ file_entity = self.api.create_file(fe, editgroup_id=editgroup_id)
self.counts['insert'] += 1
# NB: batch mode not implemented
diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py
index f702dc60..9b9ca63f 100644
--- a/python/fatcat_tools/importers/issn.py
+++ b/python/fatcat_tools/importers/issn.py
@@ -64,16 +64,16 @@ class IssnImporter(FatcatImporter):
extra=extra)
return ce
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
ce = self.parse_issn_row(row)
if ce is not None:
- self.api.create_container(ce, editgroup=editgroup)
+ self.api.create_container(ce, editgroup_id=editgroup_id)
self.counts['insert'] += 1
- def create_batch(self, batch, editgroup=None):
+ def create_batch(self, batch, editgroup_id=None):
"""Reads and processes in batches (not API-call-per-line)"""
objects = [self.parse_issn_row(l)
for l in batch if (l is not None)]
objects = [o for o in objects if (o is not None)]
- self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup)
+ self.api.create_container_batch(objects, autoaccept="true", editgroup_id=editgroup_id)
self.counts['insert'] += len(objects)
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 6f83dd23..5dbda27c 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -70,7 +70,7 @@ class MatchedImporter(FatcatImporter):
if fe is None:
fe = fatcat_client.FileEntity(
sha1=sha1,
- releases=[],
+ release_ids=[],
urls=[],
)
@@ -89,10 +89,10 @@ class MatchedImporter(FatcatImporter):
re_list.add(re.ident)
if len(re_list) == 0:
return None
- if fe.releases == set(re_list):
+ if fe.release_ids == set(re_list):
return None
- re_list.update(fe.releases)
- fe.releases = list(re_list)
+ re_list.update(fe.release_ids)
+ fe.release_ids = list(re_list)
# parse URLs and CDX
existing_urls = [feu.url for feu in fe.urls]
@@ -125,26 +125,26 @@ class MatchedImporter(FatcatImporter):
fe.mimetype = obj.get('mimetype')
return fe
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
obj = json.loads(row)
fe = self.parse_matched_dict(obj)
if fe is not None:
if fe.ident is None:
- self.api.create_file(fe, editgroup=editgroup)
+ self.api.create_file(fe, editgroup_id=editgroup_id)
self.counts['insert'] += 1
else:
- self.api.update_file(fe.ident, fe, editgroup=editgroup)
+ self.api.update_file(fe.ident, fe, editgroup_id=editgroup_id)
self.counts['update'] += 1
- def create_batch(self, batch, editgroup=None):
+ def create_batch(self, batch, editgroup_id=None):
"""Reads and processes in batches (not API-call-per-line)"""
objects = [self.parse_matched_dict(json.loads(l))
for l in batch if l != None]
new_objects = [o for o in objects if o != None and o.ident == None]
update_objects = [o for o in objects if o != None and o.ident != None]
for obj in update_objects:
- self.api.update_file(obj.ident, obj, editgroup=editgroup)
+ self.api.update_file(obj.ident, obj, editgroup_id=editgroup_id)
if len(new_objects) > 0:
- self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup)
+ self.api.create_file_batch(new_objects, autoaccept="true", editgroup_id=editgroup_id)
self.counts['update'] += len(update_objects)
self.counts['insert'] += len(new_objects)
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 9e4767f9..fc4562d0 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -57,17 +57,17 @@ class OrcidImporter(FatcatImporter):
extra=extra)
return ce
- def create_row(self, row, editgroup=None):
+ def create_row(self, row, editgroup_id=None):
obj = json.loads(row)
ce = self.parse_orcid_dict(obj)
if ce is not None:
- self.api.create_creator(ce, editgroup=editgroup)
+ self.api.create_creator(ce, editgroup_id=editgroup_id)
self.counts['insert'] += 1
- def create_batch(self, batch, editgroup=None):
+ def create_batch(self, batch, editgroup_id=None):
"""Reads and processes in batches (not API-call-per-line)"""
objects = [self.parse_orcid_dict(json.loads(l))
for l in batch if l != None]
objects = [o for o in objects if o != None]
- self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup)
+ self.api.create_creator_batch(objects, autoaccept="true", editgroup_id=editgroup_id)
self.counts['insert'] += len(objects)