summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-10-12 20:05:57 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-10-13 16:21:31 -0700
commit3052b094f2b3c1183abc17c9ca158eb6a8808a42 (patch)
tree815923722b54333d8aa7192c5923d0d90f334374 /python/fatcat_tools
parent85f4124d0c70b9fb55e6f549cfd906fcf8783c6f (diff)
downloadfatcat-3052b094f2b3c1183abc17c9ca158eb6a8808a42.tar.gz
fatcat-3052b094f2b3c1183abc17c9ca158eb6a8808a42.zip
python: partial importer utilization of new schema changes
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/chocula.py8
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py4
-rw-r--r--python/fatcat_tools/importers/pubmed.py12
3 files changed, 18 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 63290453..5c9efe94 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -52,7 +52,7 @@ class ChoculaImporter(EntityImporter):
name = name[:-1]
extra = dict()
- for k in ('urls', 'webarchive_urls', 'issne', 'issnp', 'country',
+ for k in ('urls', 'webarchive_urls', 'country',
'sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'languages',
'ia', 'scielo', 'kbart', 'publisher_type', 'platform'):
if row['extra'].get(k):
@@ -66,6 +66,8 @@ class ChoculaImporter(EntityImporter):
ce = fatcat_openapi_client.ContainerEntity(
issnl=row['issnl'],
+ issnp=row['extra'].get('issnp'),
+ issne=row['extra'].get('issne'),
ident=row['ident'],
name=name,
container_type=container_type,
@@ -132,6 +134,8 @@ class ChoculaImporter(EntityImporter):
existing.wikidata_qid = existing.wikidata_qid or ce.wikidata_qid
existing.publisher = existing.publisher or ce.publisher
existing.container_type = existing.container_type or ce.container_type
+ existing.issne = existing.issne or ce.issne
+ existing.issnp = existing.issnp or ce.issnp
for k in ('urls', 'webarchive_urls'):
# be conservative about URL updates; don't clobber existing URL lists
# may want to make this behavior more sophisticated in the
@@ -143,7 +147,7 @@ class ChoculaImporter(EntityImporter):
# always update (chocula over-rides)
if ce.extra.get(k):
existing.extra[k] = ce.extra[k]
- for k in ('issne', 'issnp', 'country'):
+ for k in ('country',):
# only include if not set (don't clobber human edits)
if ce.extra.get(k) and not existing.extra.get(k):
existing.extra[k] = ce.extra[k]
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 32782eac..9f3b429f 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -92,6 +92,8 @@ class JournalMetadataImporter(EntityImporter):
ce = fatcat_openapi_client.ContainerEntity(
issnl=row['issnl'],
+ issne=row.get('issne'),
+ issnp=row.get('issnp'),
container_type=None, # TODO
name=name,
publisher=clean(row.get('publisher')),
@@ -103,7 +105,7 @@ class JournalMetadataImporter(EntityImporter):
existing = None
try:
- existing = self.api.lookup_container(issnl=ce.issnl)
+ existing = self.api.lookup_container(issn=ce.issnl)
except fatcat_openapi_client.rest.ApiException as err:
if err.status != 404:
raise err
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 0ff55c05..c34fd7d6 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -448,9 +448,14 @@ class PubmedImporter(EntityImporter):
journal = medline.Article.Journal
issnp = journal.find("ISSN", IssnType="Print")
if issnp:
- container_extra['issnp'] = issnp.string
- if not issnl:
+ issnp = clean_issn(issnp.string)
+ else:
+ issnp = None
+
+ if not issnl and issnp:
issnl = self.issn2issnl(issnp)
+ else:
+ issnl = None
if issnl:
container_id = self.lookup_issnl(issnl)
@@ -490,12 +495,13 @@ class PubmedImporter(EntityImporter):
if (container_id is None and self.create_containers and (issnl is not None)
and container_name):
# name, type, publisher, issnl
- # extra: issnp, issne, original_name, languages, country
+ # extra: original_name, languages, country
ce = fatcat_openapi_client.ContainerEntity(
name=container_name,
container_type='journal',
#NOTE: publisher not included
issnl=issnl,
+ issnp=issnp,
extra=(container_extra or None))
ce_edit = self.create_container(ce)
container_id = ce_edit.ident