diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 14:23:02 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 14:23:06 -0700 |
commit | 955d4942d5111cbbd8f4b99d4e3c6d79262dab6e (patch) | |
tree | 87757a7afdf00ab6de5ed7455267654bf49b5239 /python/fatcat_tools | |
parent | 287e892abb4adac8ea7f7211524a41c7cd06851c (diff) | |
download | fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.tar.gz fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.zip |
partial python impl of ext_id and release_stage refactors
Diffstat (limited to 'python/fatcat_tools')
-rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 8 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 26 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 1 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/csl.py | 8 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 21 |
5 files changed, 35 insertions, 29 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 1a21194d..e1252b6d 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -96,7 +96,9 @@ def cdl_dash_release(meta, extra=None): )) r = ReleaseEntity( - doi=doi, + ext_ids=ReleaseEntityExtIds( + doi=doi, + ), title=clean(meta['title'], force_xml=True), publisher=clean(meta['publisher']), release_year=int(meta['publicationYear']), @@ -170,9 +172,9 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None): agent="fatcat_tools.auto_cdl_dash_dat"))) editgroup_id = eg.editgroup_id - if not release_id and release.doi: + if not release_id and release.ext_ids.doi: try: - r = api.lookup_release(doi=release.doi) + r = api.lookup_release(doi=release.ext_ids.doi) release_id = r.ident except fatcat_client.rest.ApiException: pass diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 722f93aa..999ce13f 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -355,10 +355,10 @@ class CrossrefImporter(EntityImporter): # release status if obj['type'] in ('journal-article', 'conference-proceeding', 'book', 'dissertation', 'book-chapter'): - release_status = "published" + release_stage = "published" else: # unknown - release_status = None + release_stage = None # external identifiers extids = self.lookup_ext_ids(doi=obj['DOI'].lower()) @@ -408,18 +408,20 @@ class CrossrefImporter(EntityImporter): title=title, original_title=original_title, release_type=release_type, - release_status=release_status, + release_stage=release_stage, release_date=release_date, release_year=release_year, publisher=publisher, - doi=obj['DOI'].lower(), - pmid=extids['pmid'], - pmcid=extids['pmcid'], - wikidata_qid=extids['wikidata_qid'], - isbn13=isbn13, - core_id=extids['core_id'], - arxiv_id=extids['arxiv_id'], - jstor_id=extids['jstor_id'], + ext_ids=fatcat_client.ReleaseEntityExtIds( + doi=obj['DOI'].lower(), + pmid=extids['pmid'], + pmcid=extids['pmcid'], + wikidata_qid=extids['wikidata_qid'], + isbn13=isbn13, + core=extids['core_id'], + arxiv=extids['arxiv_id'], + jstor=extids['jstor_id'], + ), volume=clean(obj.get('volume')), issue=clean(obj.get('issue')), pages=clean(obj.get('page')), @@ -437,7 +439,7 @@ class CrossrefImporter(EntityImporter): # lookup existing DOI (don't need to try other ext idents for crossref) existing = None try: - existing = self.api.lookup_release(doi=re.doi) + existing = self.api.lookup_release(doi=re.ext_ids.doi) except fatcat_client.rest.ApiException as err: if err.status != 404: raise err diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 7281a7a1..ba91d183 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -156,6 +156,7 @@ class GrobidMetadataImporter(EntityImporter): volume=clean(obj['journal'].get('volume')), issue=clean(obj['journal'].get('issue')), abstracts=abstracts, + ext_ids=fatcat_client.ReleaseEntityExtIds(), extra=extra) return re diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 7bc026ed..079e0501 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -109,13 +109,13 @@ def release_to_csl(entity): container_title=entity.container and entity.container.name, #container-title-short #dimensions - DOI=entity.doi, + DOI=entity.ext_ids.doi, #edition #event #event-place #first-reference-note-number #genre - ISBN=entity.isbn13, + ISBN=entity.ext_ids.isbn13, ISSN=entity.container and entity.container.issnl, issue=entity.issue, #jurisdiction @@ -131,8 +131,8 @@ def release_to_csl(entity): #original-title # TODO: page=entity.pages, page_first=entity.pages and entity.pages.split('-')[0], - PMCID=entity.pmcid, - PMID=entity.pmid, + PMCID=entity.ext_ids.pmcid, + PMID=entity.ext_ids.pmid, publisher=(entity.container and entity.container.publisher) or entity.publisher, #publisher-place #references diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 73ac046e..971d1c11 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -45,17 +45,18 @@ def release_to_elasticsearch(entity, force_bool=True): title = release.title, original_title = release.original_title, release_type = release.release_type, - release_status = release.release_status, + release_status = release.release_stage, language = release.language, license = release.license_slug, - doi = release.doi, - pmid = release.pmid, - pmcid = release.pmcid, - isbn13 = release.isbn13, - wikidata_qid = release.wikidata_qid, - core_id = release.core_id, - arxiv_id = release.core_id, - jstor_id = release.jstor_id, + doi = release.ext_ids.doi, + pmid = release.ext_ids.pmid, + pmcid = release.ext_ids.pmcid, + isbn13 = release.ext_ids.isbn13, + wikidata_qid = release.ext_ids.wikidata_qid, + core_id = release.ext_ids.core, + arxiv_id = release.ext_ids.arxiv, + jstor_id = release.ext_ids.jstor, + # TODO: mag, ark ) is_oa = None @@ -121,7 +122,7 @@ def release_to_elasticsearch(entity, force_bool=True): else: t['publisher'] = release.publisher - if release.jstor_id or (release.doi and release.doi.startswith('10.2307/')): + if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')): in_jstor = True files = release.files or [] |