diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-12 20:05:57 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-13 16:21:31 -0700 |
commit | 3052b094f2b3c1183abc17c9ca158eb6a8808a42 (patch) | |
tree | 815923722b54333d8aa7192c5923d0d90f334374 /python | |
parent | 85f4124d0c70b9fb55e6f549cfd906fcf8783c6f (diff) | |
download | fatcat-3052b094f2b3c1183abc17c9ca158eb6a8808a42.tar.gz fatcat-3052b094f2b3c1183abc17c9ca158eb6a8808a42.zip |
python: partial importer utilization of new schema changes
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/importers/chocula.py | 8 | ||||
-rw-r--r-- | python/fatcat_tools/importers/journal_metadata.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 12 |
3 files changed, 18 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index 63290453..5c9efe94 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -52,7 +52,7 @@ class ChoculaImporter(EntityImporter): name = name[:-1] extra = dict() - for k in ('urls', 'webarchive_urls', 'issne', 'issnp', 'country', + for k in ('urls', 'webarchive_urls', 'country', 'sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'languages', 'ia', 'scielo', 'kbart', 'publisher_type', 'platform'): if row['extra'].get(k): @@ -66,6 +66,8 @@ class ChoculaImporter(EntityImporter): ce = fatcat_openapi_client.ContainerEntity( issnl=row['issnl'], + issnp=row['extra'].get('issnp'), + issne=row['extra'].get('issne'), ident=row['ident'], name=name, container_type=container_type, @@ -132,6 +134,8 @@ class ChoculaImporter(EntityImporter): existing.wikidata_qid = existing.wikidata_qid or ce.wikidata_qid existing.publisher = existing.publisher or ce.publisher existing.container_type = existing.container_type or ce.container_type + existing.issne = existing.issne or ce.issne + existing.issnp = existing.issnp or ce.issnp for k in ('urls', 'webarchive_urls'): # be conservative about URL updates; don't clobber existing URL lists # may want to make this behavior more sophisticated in the @@ -143,7 +147,7 @@ class ChoculaImporter(EntityImporter): # always update (chocula over-rides) if ce.extra.get(k): existing.extra[k] = ce.extra[k] - for k in ('issne', 'issnp', 'country'): + for k in ('country',): # only include if not set (don't clobber human edits) if ce.extra.get(k) and not existing.extra.get(k): existing.extra[k] = ce.extra[k] diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 32782eac..9f3b429f 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -92,6 +92,8 @@ class JournalMetadataImporter(EntityImporter): ce = fatcat_openapi_client.ContainerEntity( issnl=row['issnl'], + issne=row.get('issne'), + issnp=row.get('issnp'), container_type=None, # TODO name=name, publisher=clean(row.get('publisher')), @@ -103,7 +105,7 @@ class JournalMetadataImporter(EntityImporter): existing = None try: - existing = self.api.lookup_container(issnl=ce.issnl) + existing = self.api.lookup_container(issn=ce.issnl) except fatcat_openapi_client.rest.ApiException as err: if err.status != 404: raise err diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 0ff55c05..c34fd7d6 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -448,9 +448,14 @@ class PubmedImporter(EntityImporter): journal = medline.Article.Journal issnp = journal.find("ISSN", IssnType="Print") if issnp: - container_extra['issnp'] = issnp.string - if not issnl: + issnp = clean_issn(issnp.string) + else: + issnp = None + + if not issnl and issnp: issnl = self.issn2issnl(issnp) + else: + issnl = None if issnl: container_id = self.lookup_issnl(issnl) @@ -490,12 +495,13 @@ class PubmedImporter(EntityImporter): if (container_id is None and self.create_containers and (issnl is not None) and container_name): # name, type, publisher, issnl - # extra: issnp, issne, original_name, languages, country + # extra: original_name, languages, country ce = fatcat_openapi_client.ContainerEntity( name=container_name, container_type='journal', #NOTE: publisher not included issnl=issnl, + issnp=issnp, extra=(container_extra or None)) ce_edit = self.create_container(ce) container_id = ce_edit.ident |