diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-16 13:53:15 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 11:41:29 -0700 |
commit | 6ff79f47c7c7ae27b28685674672e58b7dd4d271 (patch) | |
tree | 2470f89de864207da8ccc92151cb35d5e20ba21b /python/fatcat_tools/importers/jstor.py | |
parent | 300665927f578151321b0d91b28f8aadffcf227d (diff) | |
download | fatcat-6ff79f47c7c7ae27b28685674672e58b7dd4d271.tar.gz fatcat-6ff79f47c7c7ae27b28685674672e58b7dd4d271.zip |
tweaks to new imports/tests
Diffstat (limited to 'python/fatcat_tools/importers/jstor.py')
-rw-r--r-- | python/fatcat_tools/importers/jstor.py | 20 |
1 files changed, 7 insertions, 13 deletions
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py index 9bf4a043..fd1decf7 100644 --- a/python/fatcat_tools/importers/jstor.py +++ b/python/fatcat_tools/importers/jstor.py @@ -1,20 +1,12 @@ import sys import json -import sqlite3 import datetime -import itertools -import subprocess +import warnings from bs4 import BeautifulSoup import fatcat_client -from .common import EntityImporter, clean - -# is this just ISO 3-char to ISO 2-char? -# XXX: more entries -JSTOR_LANG_MAP = { - 'eng': 'en', -} +from .common import EntityImporter, clean, LANG_MAP_MARC # XXX: more entries JSTOR_CONTRIB_MAP = { @@ -136,7 +128,9 @@ class JstorImporter(EntityImporter): cm = article_meta.find("custom-meta") if cm.find("meta-name").string == "lang": language = cm.find("meta-value").string - language = JSTOR_LANG_MAP.get(language) + language = LANG_MAP_MARC.get(language) + if not language: + warnings.warn("MISSING MARC LANG: {}".format(cm.find("meta-value").string)) release_type = "article-journal" if "[Abstract]" in title: @@ -238,7 +232,7 @@ class JstorImporter(EntityImporter): return False elif existing: # but do update if only DOI was set - existing.ext_ids.jstor = re.jstor_id + existing.ext_ids.jstor = re.ext_ids.jstor existing.extra['jstor'] = re.extra['jstor'] self.api.update_release(self.get_editgroup_id(), existing.ident, existing) self.counts['update'] += 1 @@ -265,5 +259,5 @@ class JstorImporter(EntityImporter): #sys.exit(-1) if __name__=='__main__': - parser = JstorImporter() + parser = JstorImporter(None, None) parser.parse_file(open(sys.argv[1])) |