diff options
-rw-r--r-- | python/fatcat/__init__.py | 1 | ||||
-rw-r--r-- | python/fatcat/crossref_importer.py | 5 | ||||
-rw-r--r-- | python/fatcat/release_model.py | 103 | ||||
-rw-r--r-- | python/tests/release_model.py | 15 |
4 files changed, 2 insertions, 122 deletions
diff --git a/python/fatcat/__init__.py b/python/fatcat/__init__.py index b0492684..aa12f972 100644 --- a/python/fatcat/__init__.py +++ b/python/fatcat/__init__.py @@ -4,7 +4,6 @@ from flask_uuid import FlaskUUID from flask_debugtoolbar import DebugToolbarExtension from config import Config import fatcat_client -from fatcat.release_model import FatcatRelease toolbar = DebugToolbarExtension() app = Flask(__name__) diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index fbf666a3..37005965 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -6,7 +6,6 @@ import datetime import itertools import fatcat_client from fatcat.importer_common import FatcatImporter -from fatcat import FatcatRelease class FatcatCrossrefImporter(FatcatImporter): @@ -39,7 +38,7 @@ class FatcatCrossrefImporter(FatcatImporter): def parse_crossref_dict(self, obj): """ obj is a python dict (parsed from json). - returns a FatcatRelease + returns a ReleaseEntity """ # This work is out of scope if it doesn't have authors and a title @@ -213,7 +212,7 @@ class FatcatCrossrefImporter(FatcatImporter): if release_date: release_date = release_date.isoformat() + "Z" - re = FatcatRelease( + re = fatcat_client.ReleaseEntity( work_id=None, title=obj['title'][0], contribs=contribs, diff --git a/python/fatcat/release_model.py b/python/fatcat/release_model.py deleted file mode 100644 index 403fc671..00000000 --- a/python/fatcat/release_model.py +++ /dev/null @@ -1,103 +0,0 @@ - -import collections -from fatcat_client.models import ReleaseEntity -from fatcat_client.api_client import ApiClient - -class FatcatRelease(ReleaseEntity): - """ - This is a wrapper class that extends the code-generated `ReleaseEntity` - class with extra methods. - """ - - def to_elastic_dict(self): - """ - Converts from an entity model/schema to elasticsearch oriented schema. - - Returns: dict - """ - - if self.state != 'active': - raise ValueError("Entity is not 'active'") - - # First, the easy ones (direct copy) - t = dict( - ident = self.ident, - revision = self.revision, - title = self.title, - release_type = self.release_type, - release_status = self.release_status, - language = self.language, - doi = self.doi, - pmid = self.pmid, - pmcid = self.pmcid, - isbn13 = self.isbn13, - core_id = self.core_id, - wikidata_qid = self.wikidata_qid - ) - - if self.release_date: - t['release_date'] = self.release_date.strftime('%F') - - container = self.container - container_is_kept = False - if container: - t['publisher'] = container.publisher - t['container_name'] = container.name - t['container_issnl'] = container.issnl - container_extra = container.extra - if container_extra: - t['container_is_oa'] = container_extra.get('is_oa') - container_is_kept = container_extra.get('is_kept', False) - t['container_is_longtail_oa'] = container_extra.get('is_longtail_oa') - else: - t['publisher'] = self.publisher - - files = self.files or [] - t['file_count'] = len(files) - in_wa = False - in_ia = False - t['file_pdf_url'] = None - for f in files: - is_pdf = 'pdf' in f.get('mimetype', '') - for url in f.get('urls', []): - if url.get('rel', '') == 'webarchive': - in_wa = True - if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']: - in_ia = True - if is_pdf: - t['file_pdf_url'] = url['url'] - if not t['file_pdf_url'] and is_pdf: - t['file_pdf_url'] = url['url'] - t['file_in_webarchive'] = in_wa - t['file_in_ia'] = in_ia - - extra = self.extra or dict() - if extra: - t['in_shadow'] = extra.get('in_shadow') - if extra.get('grobid') and extra['grobid'].get('is_longtail_oa'): - t['container_is_longtail_oa'] = True - t['any_abstract'] = bool(self.abstracts) - t['is_kept'] = container_is_kept or extra.get('is_kept', False) - - t['ref_count'] = len(self.refs or []) - t['contrib_count'] = len(self.contribs or []) - contrib_names = [] - for c in (self.contribs or []): - if c.raw_name: - contrib_names.append(c.raw_name) - t['contrib_names'] = contrib_names - return t - - def to_json(self): - ac = ApiClient() - return ac.sanitize_for_serialization(self) - - def from_json(json_str): - """ - Hack to take advantage of the code-generated deserialization code - """ - ac = ApiClient() - thing = collections.namedtuple('Thing', ['data']) - thing.data = json_str - return ac.deserialize(thing, FatcatRelease) - diff --git a/python/tests/release_model.py b/python/tests/release_model.py deleted file mode 100644 index 4b9dddba..00000000 --- a/python/tests/release_model.py +++ /dev/null @@ -1,15 +0,0 @@ - -import json -import pytest -from fatcat.crossref_importer import FatcatCrossrefImporter -from fatcat.release_model import FatcatRelease - -from crossref import crossref_importer - -def test_elastic_convert(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: - # not a single line - raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) - r.state = 'active' - r.to_elastic_dict() |