aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2019-12-26 17:36:18 +0100
committerMartin Czygan <martin.czygan@gmail.com>2019-12-28 23:07:32 +0100
commit1f7bbc5a582db45fcd6034800959e158d35a2297 (patch)
treea0b584353ad6b638f641f35638d87d70cd28463a /python/fatcat_tools/importers
parenta4cd65ed4897987e70520d81c7caa27cd28ed5a3 (diff)
downloadfatcat-1f7bbc5a582db45fcd6034800959e158d35a2297.tar.gz
fatcat-1f7bbc5a582db45fcd6034800959e158d35a2297.zip
datacite: include doi in error messages
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/datacite.py16
1 files changed, 8 insertions, 8 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 66f812e2..a4a3ef8b 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -264,6 +264,7 @@ class DataciteImporter(EntityImporter):
return None
attributes = obj['attributes']
+ doi = attributes.get('doi', '').lower()
# Contributors. Many nameIdentifierSchemes, we do not use (yet):
# "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
@@ -313,7 +314,7 @@ class DataciteImporter(EntityImporter):
contribs.append(fatcat_openapi_client.ReleaseContrib(
index=i, extra=extra))
else:
- print('unknown name type: {}'.format(nameType), file=sys.stderr)
+ print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr)
# Title, may come with "attributes.titles[].titleType", like
# "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
@@ -322,7 +323,7 @@ class DataciteImporter(EntityImporter):
titles)
if not title:
- print('skipping record w/o title: {}'.format(obj), file=sys.stderr)
+ print('[{}] skipping record w/o title: {}'.format(doi, obj), file=sys.stderr)
return False
if not subtitle:
@@ -370,7 +371,7 @@ class DataciteImporter(EntityImporter):
container_title = container.get('title')
if isinstance(container_title, list):
if len(container_title) > 0:
- print('too many container titles: {}'.format(
+ print('[{}] too many container titles: {}'.format(doi,
len(container_title)))
container_title = container_title[0]
assert isinstance(container_title, str)
@@ -398,7 +399,7 @@ class DataciteImporter(EntityImporter):
int(first_page) < int(last_page)
pages = '{}-{}'.format(first_page, last_page)
except ValueError as err:
- print(err, file=sys.stderr)
+ print('[{}] {}'.format(doi, err), file=sys.stderr)
pass
if not pages and first_page:
@@ -426,7 +427,7 @@ class DataciteImporter(EntityImporter):
break
if release_type is None:
- print("no mapped type: {}".format(value), file=sys.stderr)
+ print("[{}] no mapped type: {}".format(doi, value), file=sys.stderr)
# Language values are varied ("ger", "es", "English", "ENG", "en-us",
# "other", ...). Try to crush it with langcodes: "It may sound to you
@@ -439,7 +440,7 @@ class DataciteImporter(EntityImporter):
try:
language = pycountry.languages.lookup(value).alpha_2
except (LookupError, AttributeError) as err:
- print('language lookup miss for {}: {}'.format(value, err), file=sys.stderr)
+ print('[{}] language lookup miss for {}: {}'.format(doi, value, err), file=sys.stderr)
# Abstracts appear in "attributes.descriptions[].descriptionType", some
# of the observed values: "Methods", "TechnicalInfo",
@@ -461,7 +462,7 @@ class DataciteImporter(EntityImporter):
try:
lang = langdetect.detect(text)
except langdetect.lang_detect_exception.LangDetectException as err:
- print('language detection failed: {}'.format(err),
+ print('[{}] language detection failed: {}'.format(doi, err),
file=sys.stderr)
abstracts.append(
fatcat_openapi_client.ReleaseAbstract(
@@ -534,7 +535,6 @@ class DataciteImporter(EntityImporter):
if extra_datacite:
extra['datacite'] = extra_datacite
- doi = attributes.get('doi', '').lower()
extids = self.lookup_ext_ids(doi=doi)
# Assemble release.