summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-12 22:54:04 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-12 22:54:04 -0800
commit0109c3a75e201e81036ad031d93602ba6c46ba08 (patch)
treedb016e7696b3cbbee98508bd1a1c3c50fc7b523f
parent97468e7de2cae4de666f2443ca5ae1f8753952c1 (diff)
downloadfatcat-0109c3a75e201e81036ad031d93602ba6c46ba08.tar.gz
fatcat-0109c3a75e201e81036ad031d93602ba6c46ba08.zip
Revert "FatcatRelease: start wrapping entities with extra methods"
This reverts commit d70abdd82955feba4eecdda24ff6d95f703e0598. Decided this wasn't the right approach.
-rw-r--r--python/fatcat/__init__.py1
-rw-r--r--python/fatcat/crossref_importer.py5
-rw-r--r--python/fatcat/release_model.py103
-rw-r--r--python/tests/release_model.py15
4 files changed, 2 insertions, 122 deletions
diff --git a/python/fatcat/__init__.py b/python/fatcat/__init__.py
index b0492684..aa12f972 100644
--- a/python/fatcat/__init__.py
+++ b/python/fatcat/__init__.py
@@ -4,7 +4,6 @@ from flask_uuid import FlaskUUID
from flask_debugtoolbar import DebugToolbarExtension
from config import Config
import fatcat_client
-from fatcat.release_model import FatcatRelease
toolbar = DebugToolbarExtension()
app = Flask(__name__)
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index fbf666a3..37005965 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -6,7 +6,6 @@ import datetime
import itertools
import fatcat_client
from fatcat.importer_common import FatcatImporter
-from fatcat import FatcatRelease
class FatcatCrossrefImporter(FatcatImporter):
@@ -39,7 +38,7 @@ class FatcatCrossrefImporter(FatcatImporter):
def parse_crossref_dict(self, obj):
"""
obj is a python dict (parsed from json).
- returns a FatcatRelease
+ returns a ReleaseEntity
"""
# This work is out of scope if it doesn't have authors and a title
@@ -213,7 +212,7 @@ class FatcatCrossrefImporter(FatcatImporter):
if release_date:
release_date = release_date.isoformat() + "Z"
- re = FatcatRelease(
+ re = fatcat_client.ReleaseEntity(
work_id=None,
title=obj['title'][0],
contribs=contribs,
diff --git a/python/fatcat/release_model.py b/python/fatcat/release_model.py
deleted file mode 100644
index 403fc671..00000000
--- a/python/fatcat/release_model.py
+++ /dev/null
@@ -1,103 +0,0 @@
-
-import collections
-from fatcat_client.models import ReleaseEntity
-from fatcat_client.api_client import ApiClient
-
-class FatcatRelease(ReleaseEntity):
- """
- This is a wrapper class that extends the code-generated `ReleaseEntity`
- class with extra methods.
- """
-
- def to_elastic_dict(self):
- """
- Converts from an entity model/schema to elasticsearch oriented schema.
-
- Returns: dict
- """
-
- if self.state != 'active':
- raise ValueError("Entity is not 'active'")
-
- # First, the easy ones (direct copy)
- t = dict(
- ident = self.ident,
- revision = self.revision,
- title = self.title,
- release_type = self.release_type,
- release_status = self.release_status,
- language = self.language,
- doi = self.doi,
- pmid = self.pmid,
- pmcid = self.pmcid,
- isbn13 = self.isbn13,
- core_id = self.core_id,
- wikidata_qid = self.wikidata_qid
- )
-
- if self.release_date:
- t['release_date'] = self.release_date.strftime('%F')
-
- container = self.container
- container_is_kept = False
- if container:
- t['publisher'] = container.publisher
- t['container_name'] = container.name
- t['container_issnl'] = container.issnl
- container_extra = container.extra
- if container_extra:
- t['container_is_oa'] = container_extra.get('is_oa')
- container_is_kept = container_extra.get('is_kept', False)
- t['container_is_longtail_oa'] = container_extra.get('is_longtail_oa')
- else:
- t['publisher'] = self.publisher
-
- files = self.files or []
- t['file_count'] = len(files)
- in_wa = False
- in_ia = False
- t['file_pdf_url'] = None
- for f in files:
- is_pdf = 'pdf' in f.get('mimetype', '')
- for url in f.get('urls', []):
- if url.get('rel', '') == 'webarchive':
- in_wa = True
- if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']:
- in_ia = True
- if is_pdf:
- t['file_pdf_url'] = url['url']
- if not t['file_pdf_url'] and is_pdf:
- t['file_pdf_url'] = url['url']
- t['file_in_webarchive'] = in_wa
- t['file_in_ia'] = in_ia
-
- extra = self.extra or dict()
- if extra:
- t['in_shadow'] = extra.get('in_shadow')
- if extra.get('grobid') and extra['grobid'].get('is_longtail_oa'):
- t['container_is_longtail_oa'] = True
- t['any_abstract'] = bool(self.abstracts)
- t['is_kept'] = container_is_kept or extra.get('is_kept', False)
-
- t['ref_count'] = len(self.refs or [])
- t['contrib_count'] = len(self.contribs or [])
- contrib_names = []
- for c in (self.contribs or []):
- if c.raw_name:
- contrib_names.append(c.raw_name)
- t['contrib_names'] = contrib_names
- return t
-
- def to_json(self):
- ac = ApiClient()
- return ac.sanitize_for_serialization(self)
-
- def from_json(json_str):
- """
- Hack to take advantage of the code-generated deserialization code
- """
- ac = ApiClient()
- thing = collections.namedtuple('Thing', ['data'])
- thing.data = json_str
- return ac.deserialize(thing, FatcatRelease)
-
diff --git a/python/tests/release_model.py b/python/tests/release_model.py
deleted file mode 100644
index 4b9dddba..00000000
--- a/python/tests/release_model.py
+++ /dev/null
@@ -1,15 +0,0 @@
-
-import json
-import pytest
-from fatcat.crossref_importer import FatcatCrossrefImporter
-from fatcat.release_model import FatcatRelease
-
-from crossref import crossref_importer
-
-def test_elastic_convert(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
- # not a single line
- raw = json.loads(f.read())
- (r, c) = crossref_importer.parse_crossref_dict(raw)
- r.state = 'active'
- r.to_elastic_dict()