From 31d1a6a713d177990609767d508209ced19ca396 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 2 Nov 2021 18:14:59 -0700 Subject: fmt (black): fatcat_tools/ --- python/fatcat_tools/importers/journal_metadata.py | 111 +++++++++++++--------- 1 file changed, 65 insertions(+), 46 deletions(-) (limited to 'python/fatcat_tools/importers/journal_metadata.py') diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 25d7b3b5..6d1fefa3 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -1,4 +1,3 @@ - import fatcat_openapi_client from .common import EntityImporter, clean @@ -11,18 +10,20 @@ def or_none(s): return None return s + def truthy(s): if s is None: return None s = s.lower() - if s in ('true', 't', 'yes', 'y', '1'): + if s in ("true", "t", "yes", "y", "1"): return True - elif s in ('false', 'f', 'no', 'n', '0'): + elif s in ("false", "f", "no", "n", "0"): return False else: return None + class JournalMetadataImporter(EntityImporter): """ Imports journal metadata ("containers") by ISSN, currently from a custom @@ -33,17 +34,16 @@ class JournalMetadataImporter(EntityImporter): def __init__(self, api, **kwargs): - eg_desc = kwargs.get('editgroup_description', - "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.") - eg_extra = kwargs.get('editgroup_extra', dict()) - eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JournalMetadataImporter') - super().__init__(api, - editgroup_description=eg_desc, - editgroup_extra=eg_extra, - **kwargs) + eg_desc = kwargs.get( + "editgroup_description", + "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.", + ) + eg_extra = kwargs.get("editgroup_extra", dict()) + eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JournalMetadataImporter") + super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs) def want(self, raw_record): - if raw_record.get('issnl') and raw_record.get('name'): + if raw_record.get("issnl") and raw_record.get("name"): return True return False @@ -54,52 +54,68 @@ class JournalMetadataImporter(EntityImporter): returns a ContainerEntity (or None if invalid or couldn't parse) """ - if not row.get('name'): + if not row.get("name"): # Name is required (by schema) return None extra = dict() - for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev', - 'coden', 'aliases', 'original_name', 'first_year', 'last_year', - 'platform', 'default_license', 'road', 'mimetypes', - 'sherpa_romeo', 'kbart'): + for key in ( + "issne", + "issnp", + "languages", + "country", + "urls", + "abbrev", + "coden", + "aliases", + "original_name", + "first_year", + "last_year", + "platform", + "default_license", + "road", + "mimetypes", + "sherpa_romeo", + "kbart", + ): if row.get(key): extra[key] = row[key] # TODO: not including for now: norwegian, dois/crossref, ia extra_doaj = dict() - if row.get('doaj'): - if row['doaj'].get('as_of'): - extra_doaj['as_of'] = row['doaj']['as_of'] - if row['doaj'].get('works'): - extra_doaj['works'] = row['doaj']['works'] + if row.get("doaj"): + if row["doaj"].get("as_of"): + extra_doaj["as_of"] = row["doaj"]["as_of"] + if row["doaj"].get("works"): + extra_doaj["works"] = row["doaj"]["works"] if extra_doaj: - extra['doaj'] = extra_doaj + extra["doaj"] = extra_doaj extra_ia = dict() # TODO: would like an ia.longtail_ia flag - if row.get('sim'): + if row.get("sim"): # NB: None case of the .get() here is blech, but othrwise # extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on - extra_ia['sim'] = { - 'year_spans': row['sim'].get('year_spans'), + extra_ia["sim"] = { + "year_spans": row["sim"].get("year_spans"), } if extra_ia: - extra['ia'] = extra_ia + extra["ia"] = extra_ia - name = clean(row.get('name')) + name = clean(row.get("name")) if not name: return None ce = fatcat_openapi_client.ContainerEntity( - issnl=row['issnl'], - issne=row.get('issne'), - issnp=row.get('issnp'), - container_type=None, # TODO + issnl=row["issnl"], + issne=row.get("issne"), + issnp=row.get("issnp"), + container_type=None, # TODO name=name, - publisher=clean(row.get('publisher')), - wikidata_qid=None, # TODO - extra=extra) + publisher=clean(row.get("publisher")), + wikidata_qid=None, # TODO + extra=extra, + ) return ce def try_update(self, ce): @@ -118,23 +134,26 @@ class JournalMetadataImporter(EntityImporter): # for now, only update KBART, and only if there is new content if not existing.extra: existing.extra = dict() - if ce.extra.get('kbart') and (existing.extra.get('kbart') != ce.extra['kbart']): - if not existing.extra.get('kbart'): - existing.extra['kbart'] = {} - existing.extra['kbart'].update(ce.extra['kbart']) + if ce.extra.get("kbart") and (existing.extra.get("kbart") != ce.extra["kbart"]): + if not existing.extra.get("kbart"): + existing.extra["kbart"] = {} + existing.extra["kbart"].update(ce.extra["kbart"]) self.api.update_container(self.get_editgroup_id(), existing.ident, existing) - self.counts['update'] += 1 + self.counts["update"] += 1 return False else: - self.counts['exists'] += 1 + self.counts["exists"] += 1 return False # if we got this far, it's a bug raise NotImplementedError def insert_batch(self, batch): - self.api.create_container_auto_batch(fatcat_openapi_client.ContainerAutoBatch( - editgroup=fatcat_openapi_client.Editgroup( - description=self.editgroup_description, - extra=self.editgroup_extra), - entity_list=batch)) + self.api.create_container_auto_batch( + fatcat_openapi_client.ContainerAutoBatch( + editgroup=fatcat_openapi_client.Editgroup( + description=self.editgroup_description, extra=self.editgroup_extra + ), + entity_list=batch, + ) + ) -- cgit v1.2.3