summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/transforms.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-25 18:41:33 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-25 18:41:33 -0800
commitf6f7450903bdbe36bd5fff146b942e34ad221557 (patch)
treec50332c832f414b5c0070e58a42ceb4751ed4d81 /python/fatcat_tools/transforms.py
parent16256f8ed119c072c09b13b0b1a6d4a56bed5113 (diff)
downloadfatcat-f6f7450903bdbe36bd5fff146b942e34ad221557.tar.gz
fatcat-f6f7450903bdbe36bd5fff146b942e34ad221557.zip
transform and import fixes/tweaks
Diffstat (limited to 'python/fatcat_tools/transforms.py')
-rw-r--r--python/fatcat_tools/transforms.py51
1 files changed, 38 insertions, 13 deletions
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index a85c877c..7bb75c3e 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -231,20 +231,12 @@ def container_to_elasticsearch(entity):
container_type = entity.container_type,
issnl = entity.issnl,
wikidata_qid = entity.wikidata_qid,
-
- entity_status = entity.entity_status,
- language = entity.language,
- license = entity.license_slug,
- doi = entity.doi,
- pmid = entity.pmid,
- isbn13 = entity.isbn13,
- core_id = entity.core_id,
- arxiv_id = entity.core_id,
- jstor_id = entity.jstor_id,
)
# TODO: region, discipline
# TODO: single primary language?
+ if not entity.extra:
+ entity.extra = dict()
for key in ('country', 'languages', 'mimetypes', 'first_year', 'last_year'):
if entity.extra.get(key):
t[key] = entity.extra[key]
@@ -285,13 +277,46 @@ def container_to_elasticsearch(entity):
if extra['ia'].get('sim'):
any_ia_sim = True
- t['in_doaj'] = is_doaj
- t['in_road'] = is_road
+ t['in_doaj'] = in_doaj
+ t['in_road'] = in_road
t['in_doi'] = in_doi
t['in_sherpa_romeo'] = in_sherpa_romeo
- t['is_oa'] = in_doaj or in_road or is_longtail_oa or ia_oa
+ t['is_oa'] = in_doaj or in_road or is_longtail_oa or is_oa
t['is_longtail_oa'] = is_longtail_oa
t['any_kbart'] = any_ia_sim
t['any_jstor'] = any_ia_sim
t['any_ia_sim'] = bool(any_ia_sim)
return t
+
+
+def changelog_to_elasticsearch(entity):
+
+ editgroup = entity.editgroup
+ t = dict(
+ index=entity.index,
+ editgroup_id=entity.editgroup_id,
+ timestamp=entity.timestamp,
+ editor_id=editgroup.editor_id,
+ )
+
+ extra = editgroup.extra or dict()
+ if extra.get('agent'):
+ t['agent'] = extra['agent']
+
+ t['containers'] = len(editgroup.edits.containers)
+ t['creators'] = len(editgroup.edits.containers)
+ t['files'] = len(editgroup.edits.containers)
+ t['filesets'] = len(editgroup.edits.containers)
+ t['webcaptures'] = len(editgroup.edits.containers)
+ t['releases'] = len(editgroup.edits.containers)
+ t['works'] = len(editgroup.edits.containers)
+
+ # TODO: parse and pull out counts
+ #created = 0
+ #updated = 0
+ #deleted = 0
+ #t['created'] = created
+ #t['updated'] = updated
+ #t['deleted'] = deleted
+ #t['total'] = created + updated + deleted
+ return t