summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-13 14:23:02 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-13 14:23:06 -0700
commit955d4942d5111cbbd8f4b99d4e3c6d79262dab6e (patch)
tree87757a7afdf00ab6de5ed7455267654bf49b5239 /python/fatcat_tools
parent287e892abb4adac8ea7f7211524a41c7cd06851c (diff)
downloadfatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.tar.gz
fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.zip
partial python impl of ext_id and release_stage refactors
Diffstat (limited to 'python/fatcat_tools')
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py8
-rw-r--r--python/fatcat_tools/importers/crossref.py26
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py1
-rw-r--r--python/fatcat_tools/transforms/csl.py8
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py21
5 files changed, 35 insertions, 29 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 1a21194d..e1252b6d 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -96,7 +96,9 @@ def cdl_dash_release(meta, extra=None):
))
r = ReleaseEntity(
- doi=doi,
+ ext_ids=ReleaseEntityExtIds(
+ doi=doi,
+ ),
title=clean(meta['title'], force_xml=True),
publisher=clean(meta['publisher']),
release_year=int(meta['publicationYear']),
@@ -170,9 +172,9 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):
agent="fatcat_tools.auto_cdl_dash_dat")))
editgroup_id = eg.editgroup_id
- if not release_id and release.doi:
+ if not release_id and release.ext_ids.doi:
try:
- r = api.lookup_release(doi=release.doi)
+ r = api.lookup_release(doi=release.ext_ids.doi)
release_id = r.ident
except fatcat_client.rest.ApiException:
pass
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 722f93aa..999ce13f 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -355,10 +355,10 @@ class CrossrefImporter(EntityImporter):
# release status
if obj['type'] in ('journal-article', 'conference-proceeding', 'book',
'dissertation', 'book-chapter'):
- release_status = "published"
+ release_stage = "published"
else:
# unknown
- release_status = None
+ release_stage = None
# external identifiers
extids = self.lookup_ext_ids(doi=obj['DOI'].lower())
@@ -408,18 +408,20 @@ class CrossrefImporter(EntityImporter):
title=title,
original_title=original_title,
release_type=release_type,
- release_status=release_status,
+ release_stage=release_stage,
release_date=release_date,
release_year=release_year,
publisher=publisher,
- doi=obj['DOI'].lower(),
- pmid=extids['pmid'],
- pmcid=extids['pmcid'],
- wikidata_qid=extids['wikidata_qid'],
- isbn13=isbn13,
- core_id=extids['core_id'],
- arxiv_id=extids['arxiv_id'],
- jstor_id=extids['jstor_id'],
+ ext_ids=fatcat_client.ReleaseEntityExtIds(
+ doi=obj['DOI'].lower(),
+ pmid=extids['pmid'],
+ pmcid=extids['pmcid'],
+ wikidata_qid=extids['wikidata_qid'],
+ isbn13=isbn13,
+ core=extids['core_id'],
+ arxiv=extids['arxiv_id'],
+ jstor=extids['jstor_id'],
+ ),
volume=clean(obj.get('volume')),
issue=clean(obj.get('issue')),
pages=clean(obj.get('page')),
@@ -437,7 +439,7 @@ class CrossrefImporter(EntityImporter):
# lookup existing DOI (don't need to try other ext idents for crossref)
existing = None
try:
- existing = self.api.lookup_release(doi=re.doi)
+ existing = self.api.lookup_release(doi=re.ext_ids.doi)
except fatcat_client.rest.ApiException as err:
if err.status != 404:
raise err
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 7281a7a1..ba91d183 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -156,6 +156,7 @@ class GrobidMetadataImporter(EntityImporter):
volume=clean(obj['journal'].get('volume')),
issue=clean(obj['journal'].get('issue')),
abstracts=abstracts,
+ ext_ids=fatcat_client.ReleaseEntityExtIds(),
extra=extra)
return re
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 7bc026ed..079e0501 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -109,13 +109,13 @@ def release_to_csl(entity):
container_title=entity.container and entity.container.name,
#container-title-short
#dimensions
- DOI=entity.doi,
+ DOI=entity.ext_ids.doi,
#edition
#event
#event-place
#first-reference-note-number
#genre
- ISBN=entity.isbn13,
+ ISBN=entity.ext_ids.isbn13,
ISSN=entity.container and entity.container.issnl,
issue=entity.issue,
#jurisdiction
@@ -131,8 +131,8 @@ def release_to_csl(entity):
#original-title
# TODO: page=entity.pages,
page_first=entity.pages and entity.pages.split('-')[0],
- PMCID=entity.pmcid,
- PMID=entity.pmid,
+ PMCID=entity.ext_ids.pmcid,
+ PMID=entity.ext_ids.pmid,
publisher=(entity.container and entity.container.publisher) or entity.publisher,
#publisher-place
#references
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 73ac046e..971d1c11 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -45,17 +45,18 @@ def release_to_elasticsearch(entity, force_bool=True):
title = release.title,
original_title = release.original_title,
release_type = release.release_type,
- release_status = release.release_status,
+ release_status = release.release_stage,
language = release.language,
license = release.license_slug,
- doi = release.doi,
- pmid = release.pmid,
- pmcid = release.pmcid,
- isbn13 = release.isbn13,
- wikidata_qid = release.wikidata_qid,
- core_id = release.core_id,
- arxiv_id = release.core_id,
- jstor_id = release.jstor_id,
+ doi = release.ext_ids.doi,
+ pmid = release.ext_ids.pmid,
+ pmcid = release.ext_ids.pmcid,
+ isbn13 = release.ext_ids.isbn13,
+ wikidata_qid = release.ext_ids.wikidata_qid,
+ core_id = release.ext_ids.core,
+ arxiv_id = release.ext_ids.arxiv,
+ jstor_id = release.ext_ids.jstor,
+ # TODO: mag, ark
)
is_oa = None
@@ -121,7 +122,7 @@ def release_to_elasticsearch(entity, force_bool=True):
else:
t['publisher'] = release.publisher
- if release.jstor_id or (release.doi and release.doi.startswith('10.2307/')):
+ if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')):
in_jstor = True
files = release.files or []