1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
from fatcat_client.models import ReleaseEntity
class FatcatRelease(ReleaseEntity):
"""
This is a wrapper class that extends the code-generated `ReleaseEntity`
class with extra methods.
"""
def to_elastic_dict(self):
"""
Converts from an entity model/schema to elasticsearch oriented schema.
Returns: dict
"""
if self.state != 'active':
raise ValueError("Entity is not 'active'")
# First, the easy ones (direct copy)
t = dict(
ident = self.ident,
revision = self.revision,
title = self.title,
release_date = self.release_date,
release_type = self.release_type,
release_status = self.release_status,
language = self.language,
doi = self.doi,
pmid = self.pmid,
pmcid = self.pmcid,
isbn13 = self.isbn13,
core_id = self.core_id,
wikidata_qid = self.wikidata_qid
)
container = self.container
container_is_kept = False
if container:
t['publisher'] = container.publisher
t['container_name'] = container.name
t['container_issnl'] = container.issnl
container_extra = container.extra
if container_extra:
t['container_is_oa'] = container_extra.get('is_oa')
container_is_kept = container_extra.get('is_kept', False)
t['container_is_longtail_oa'] = container_extra.get('is_longtail_oa')
else:
t['publisher'] = self.publisher
files = self.files or []
t['file_count'] = len(files)
in_wa = False
in_ia = False
t['file_pdf_url'] = None
for f in files:
is_pdf = 'pdf' in f.get('mimetype', '')
for url in f.get('urls', []):
if url.get('rel', '') == 'webarchive':
in_wa = True
if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']:
in_ia = True
if is_pdf:
t['file_pdf_url'] = url['url']
if not t['file_pdf_url'] and is_pdf:
t['file_pdf_url'] = url['url']
t['file_in_webarchive'] = in_wa
t['file_in_ia'] = in_ia
extra = self.extra or dict()
if extra:
t['in_shadow'] = extra.get('in_shadow')
if extra.get('grobid') and extra['grobid'].get('is_longtail_oa'):
t['container_is_longtail_oa'] = True
t['any_abstract'] = bool(self.abstracts)
t['is_kept'] = container_is_kept or extra.get('is_kept', False)
t['ref_count'] = len(self.refs or [])
t['contrib_count'] = len(self.contribs or [])
contrib_names = []
for c in (self.contribs or []):
if c.raw_name:
contrib_names.append(c.raw_name)
t['contrib_names'] = contrib_names
return t
|