aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/journal_metadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/journal_metadata.py')
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py111
1 files changed, 65 insertions, 46 deletions
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 25d7b3b5..6d1fefa3 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,4 +1,3 @@
-
import fatcat_openapi_client
from .common import EntityImporter, clean
@@ -11,18 +10,20 @@ def or_none(s):
return None
return s
+
def truthy(s):
if s is None:
return None
s = s.lower()
- if s in ('true', 't', 'yes', 'y', '1'):
+ if s in ("true", "t", "yes", "y", "1"):
return True
- elif s in ('false', 'f', 'no', 'n', '0'):
+ elif s in ("false", "f", "no", "n", "0"):
return False
else:
return None
+
class JournalMetadataImporter(EntityImporter):
"""
Imports journal metadata ("containers") by ISSN, currently from a custom
@@ -33,17 +34,16 @@ class JournalMetadataImporter(EntityImporter):
def __init__(self, api, **kwargs):
- eg_desc = kwargs.get('editgroup_description',
- "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.")
- eg_extra = kwargs.get('editgroup_extra', dict())
- eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JournalMetadataImporter')
- super().__init__(api,
- editgroup_description=eg_desc,
- editgroup_extra=eg_extra,
- **kwargs)
+ eg_desc = kwargs.get(
+ "editgroup_description",
+ "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.",
+ )
+ eg_extra = kwargs.get("editgroup_extra", dict())
+ eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JournalMetadataImporter")
+ super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
def want(self, raw_record):
- if raw_record.get('issnl') and raw_record.get('name'):
+ if raw_record.get("issnl") and raw_record.get("name"):
return True
return False
@@ -54,52 +54,68 @@ class JournalMetadataImporter(EntityImporter):
returns a ContainerEntity (or None if invalid or couldn't parse)
"""
- if not row.get('name'):
+ if not row.get("name"):
# Name is required (by schema)
return None
extra = dict()
- for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev',
- 'coden', 'aliases', 'original_name', 'first_year', 'last_year',
- 'platform', 'default_license', 'road', 'mimetypes',
- 'sherpa_romeo', 'kbart'):
+ for key in (
+ "issne",
+ "issnp",
+ "languages",
+ "country",
+ "urls",
+ "abbrev",
+ "coden",
+ "aliases",
+ "original_name",
+ "first_year",
+ "last_year",
+ "platform",
+ "default_license",
+ "road",
+ "mimetypes",
+ "sherpa_romeo",
+ "kbart",
+ ):
if row.get(key):
extra[key] = row[key]
# TODO: not including for now: norwegian, dois/crossref, ia
extra_doaj = dict()
- if row.get('doaj'):
- if row['doaj'].get('as_of'):
- extra_doaj['as_of'] = row['doaj']['as_of']
- if row['doaj'].get('works'):
- extra_doaj['works'] = row['doaj']['works']
+ if row.get("doaj"):
+ if row["doaj"].get("as_of"):
+ extra_doaj["as_of"] = row["doaj"]["as_of"]
+ if row["doaj"].get("works"):
+ extra_doaj["works"] = row["doaj"]["works"]
if extra_doaj:
- extra['doaj'] = extra_doaj
+ extra["doaj"] = extra_doaj
extra_ia = dict()
# TODO: would like an ia.longtail_ia flag
- if row.get('sim'):
+ if row.get("sim"):
# NB: None case of the .get() here is blech, but othrwise
# extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on
- extra_ia['sim'] = {
- 'year_spans': row['sim'].get('year_spans'),
+ extra_ia["sim"] = {
+ "year_spans": row["sim"].get("year_spans"),
}
if extra_ia:
- extra['ia'] = extra_ia
+ extra["ia"] = extra_ia
- name = clean(row.get('name'))
+ name = clean(row.get("name"))
if not name:
return None
ce = fatcat_openapi_client.ContainerEntity(
- issnl=row['issnl'],
- issne=row.get('issne'),
- issnp=row.get('issnp'),
- container_type=None, # TODO
+ issnl=row["issnl"],
+ issne=row.get("issne"),
+ issnp=row.get("issnp"),
+ container_type=None, # TODO
name=name,
- publisher=clean(row.get('publisher')),
- wikidata_qid=None, # TODO
- extra=extra)
+ publisher=clean(row.get("publisher")),
+ wikidata_qid=None, # TODO
+ extra=extra,
+ )
return ce
def try_update(self, ce):
@@ -118,23 +134,26 @@ class JournalMetadataImporter(EntityImporter):
# for now, only update KBART, and only if there is new content
if not existing.extra:
existing.extra = dict()
- if ce.extra.get('kbart') and (existing.extra.get('kbart') != ce.extra['kbart']):
- if not existing.extra.get('kbart'):
- existing.extra['kbart'] = {}
- existing.extra['kbart'].update(ce.extra['kbart'])
+ if ce.extra.get("kbart") and (existing.extra.get("kbart") != ce.extra["kbart"]):
+ if not existing.extra.get("kbart"):
+ existing.extra["kbart"] = {}
+ existing.extra["kbart"].update(ce.extra["kbart"])
self.api.update_container(self.get_editgroup_id(), existing.ident, existing)
- self.counts['update'] += 1
+ self.counts["update"] += 1
return False
else:
- self.counts['exists'] += 1
+ self.counts["exists"] += 1
return False
# if we got this far, it's a bug
raise NotImplementedError
def insert_batch(self, batch):
- self.api.create_container_auto_batch(fatcat_openapi_client.ContainerAutoBatch(
- editgroup=fatcat_openapi_client.Editgroup(
- description=self.editgroup_description,
- extra=self.editgroup_extra),
- entity_list=batch))
+ self.api.create_container_auto_batch(
+ fatcat_openapi_client.ContainerAutoBatch(
+ editgroup=fatcat_openapi_client.Editgroup(
+ description=self.editgroup_description, extra=self.editgroup_extra
+ ),
+ entity_list=batch,
+ )
+ )