aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-04 10:38:10 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-04 10:39:19 -0700
commit990af8b9b8dab3bcfde3f93e21d89b3a2f41dcd8 (patch)
treeaf02f29af8b43a8eecea333d92a63f745e07ff61
parentf81069f4cb126af65f2e2fe08cde44077eea75e7 (diff)
downloadfatcat-990af8b9b8dab3bcfde3f93e21d89b3a2f41dcd8.tar.gz
fatcat-990af8b9b8dab3bcfde3f93e21d89b3a2f41dcd8.zip
more update keys and cases for chocula importer
-rw-r--r--python/fatcat_tools/importers/chocula.py16
1 files changed, 11 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 1683c500..356ffe42 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -53,7 +53,8 @@ class ChoculaImporter(EntityImporter):
extra = dict()
for k in ('urls', 'webarchive_urls', 'issne', 'issnp', 'country',
- 'sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'languages'):
+ 'sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'languages',
+ 'ia', 'scielo', 'kbart', 'publisher_type', 'platform'):
if row['extra'].get(k):
extra[k] = row['extra'][k]
@@ -115,17 +116,21 @@ class ChoculaImporter(EntityImporter):
do_update = True
if set(ce.extra.get('webarchive_urls', [])) != set(existing.extra.get('webarchive_urls', [])):
do_update = True
- for k in ('ezb', 'szczepanski', 'doaj'):
+ for k in ('ezb', 'szczepanski', 'doaj', 'publisher_type', 'platform'):
if ce.extra.get(k) and not existing.extra.get(k):
do_update = True
+ for k in ('kbart', 'ia', 'doaj'):
+ # always update with these fields
+ if ce.extra.get(k) and ce.extra[k] != existing.extra.get(k):
+ do_update = True
if ce.publisher and not existing.publisher:
do_update = True
if ce.wikidata_qid and not existing.wikidata_qid:
do_update = True
if do_update:
- existing.wikidata_qid = ce.wikidata_qid
- existing.publisher = ce.publisher
+ existing.wikidata_qid = existing.wikidata_qid or ce.wikidata_qid
+ existing.publisher = existing.publisher or ce.publisher
existing.container_type = existing.container_type or ce.container_type
for k in ('urls', 'webarchive_urls'):
# update, which might clobber, but won't remove
@@ -135,7 +140,8 @@ class ChoculaImporter(EntityImporter):
# all URLs found to be bad), but being conservative for now so
# we don't clobber human edits
for k in ('issne', 'issnp', 'country', 'sherpa_romeo', 'ezb',
- 'szczepanski', 'doaj'):
+ 'szczepanski', 'doaj', 'ia', 'scielo', 'kbart',
+ 'publisher_type', 'platform'):
# update/overwrite, but don't remove any existing value
if ce.extra.get(k):
existing.extra[k] = ce.extra[k]