diff options
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/harvest/harvest_common.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 8 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/csl.py | 18 | ||||
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 26 |
6 files changed, 47 insertions, 15 deletions
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 310366bd..5f7aa084 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -133,7 +133,7 @@ class HarvestState: def fail_fast(err, msg): if err: raise KafkaException(err) - print("Commiting status to Kafka: {}".format(kafka_topic), file=sys.stderr) + print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr) producer_conf = kafka_config.copy() producer_conf.update({ 'delivery.report.only.error': True, @@ -164,7 +164,7 @@ class HarvestState: raise KafkaException(err) conf = kafka_config.copy() conf.update({ - 'group.id': 'dummy_init_group', # should never be commited + 'group.id': 'dummy_init_group', # should never be committed 'enable.auto.commit': False, 'auto.offset.reset': 'earliest', 'session.timeout.ms': 10000, diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index c000ad62..da611ecb 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -816,7 +816,7 @@ class KafkaJsonPusher(RecordPusher): while True: # Note: this is batch-oriented, because underlying importer is # often batch-oriented, but this doesn't confirm that entire batch - # has been pushed to fatcat before commiting offset. Eg, consider + # has been pushed to fatcat before committing offset. Eg, consider # case where there there is one update and thousands of creates; # update would be lingering in importer, and if importer crashed # never created. diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index bd070ef1..9617299c 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -9,7 +9,7 @@ import fatcat_openapi_client from .common import EntityImporter, clean -# The docs/guide should be the cannonical home for these mappings; update there +# The docs/guide should be the canonical home for these mappings; update there # first # Can get a list of Crossref types (with counts) via API: # https://api.crossref.org/works?rows=0&facet=type-name:* @@ -188,7 +188,7 @@ class CrossrefImporter(EntityImporter): self.counts['skip-release-type'] += 1 return None - # Do require the 'title' keys to exsit, as release entities do + # Do require the 'title' keys to exist, as release entities do if (not 'title' in obj) or (not obj['title']): self.counts['skip-blank-title'] += 1 return None diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 5b736787..81f00876 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -3,7 +3,7 @@ Prototype importer for datacite.org data. Example input document: https://api.datacite.org/dois/10.7916/d8-f93n-rk51 -Datacite being an aggregator, the data is heterogenous and exposes a couple of +Datacite being an aggregator, the data is heterogeneous and exposes a couple of problems in content and structure. A few fields have their own parsing functions (parse_datacite_...), which may help testing. """ @@ -36,7 +36,7 @@ CONTAINER_TYPE_MAP = { 'Book Series': 'book-series', } -# The docs/guide should be the cannonical home for these mappings; update there +# The docs/guide should be the canonical home for these mappings; update there # first. Map various datacite type types to CSL-ish types. None means TODO or # remove. DATACITE_TYPE_MAP = { @@ -228,7 +228,7 @@ class DataciteImporter(EntityImporter): def lookup_ext_ids(self, doi): """ - Return dictionary of identifiers refering to the same things as the given DOI. + Return dictionary of identifiers referring to the same things as the given DOI. """ if self.extid_map_db is None: return dict(core_id=None, @@ -584,7 +584,7 @@ class DataciteImporter(EntityImporter): # Include certain relations from relatedIdentifiers. Keeping the # original structure of data here, which is a list of dicts, with - # relation type, identifer and identifier type (mostly). + # relation type, identifier and identifier type (mostly). relations = [] for rel in relIds: if rel.get('relationType') in ('IsPartOf', 'Reviews', 'Continues', diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 7ab94cac..832ad6aa 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -37,8 +37,9 @@ def release_to_csl(entity): # Default to "local" (publication-specific) metadata; fall back to # creator-level family = contrib.surname or contrib.creator.surname or (contrib.raw_name and contrib.raw_name.split()[-1]) - if not contrib.raw_name: - raise ValueError("CSL requires some surname (family name)") + if not family: + # CSL requires some surname (family name) + continue c = dict( family=family, given=contrib.given_name or contrib.creator.given_name, @@ -49,22 +50,27 @@ def release_to_csl(entity): #static-ordering literal=contrib.raw_name or contrib.creator.display_name, #parse-names, - role=contrib.role, + # role must be defined; default to author + role=contrib.role or 'author', ) else: family = contrib.surname or (contrib.raw_name and contrib.raw_name.split()[-1]) - if not contrib.raw_name: - raise ValueError("CSL requires some surname (family name)") + if not family: + # CSL requires some surname (family name) + continue c = dict( family=family, given=contrib.given_name, literal=contrib.raw_name, - role=contrib.role, + # role must be defined; default to author + role=contrib.role or 'author', ) for k in list(c.keys()): if not c[k]: c.pop(k) contribs.append(c) + if not contribs: + raise ValueError("citeproc requires at least one author with a surname") abstract = None if entity.abstracts: abstract = entity.abstracts[0].content diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 5783bbfc..d1e7c2db 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -110,6 +110,32 @@ class EntityUpdatesWorker(FatcatWorker): # the lancet (often hybrid OA) "10.1016/s0140-6736", "10.1016/s2213-2600", + # journal of virology + "10.1128/jvi.", + # FEBS letters + "10.1002/1873-3468.", + # Journal of Neuroscience + "10.1523/jneurosci.", + # Chemical and pharmaceutical bulletin + "10.1248/cpb.", + # Japanese Journal of Radiological Technology + "10.6009/jjrt.", + # Seibutsu Butsuri + "10.2142/biophys.", + # Chemical Communications + "10.1039/d0cc", + # Yakugaku zasshi + "10.1248/yakushi.", + # bulletin AMS + "10.1090/s0002-9904", + # Current Biology + "10.1016/j.cub.", + # Antarctica A Keystone in a Changing World + "10.3133/ofr", + # Clinical Cancer Research + "10.1158/1078-0432.", + # Transactions of the Japan Society of Mechanical Engineers + "10.1299/kikai", ] def want_live_ingest(self, release, ingest_request): |