summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/transforms
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-01-29 21:52:58 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-01-29 21:59:05 -0800
commitd58c3891ac2122dac53ced606568108f543f2d80 (patch)
treecf3780f3df09507a0073136a0e1f63a9a04b174c /python/fatcat_tools/transforms
parent5d458a3df7e58e6551d8ec72979e376c62fdd2f7 (diff)
downloadfatcat-d58c3891ac2122dac53ced606568108f543f2d80.tar.gz
fatcat-d58c3891ac2122dac53ced606568108f543f2d80.zip
actually implement changelog transform
Diffstat (limited to 'python/fatcat_tools/transforms')
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py62
1 files changed, 45 insertions, 17 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 812cd1fd..c8547b27 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -401,36 +401,64 @@ def container_to_elasticsearch(entity, force_bool=True):
return t
+def _type_of_edit(edit):
+ if edit.revision == None and edit.redirect_ident == None:
+ return 'delete'
+ elif edit.redirect_ident:
+ # redirect
+ return 'update'
+ elif edit.prev_revision == None and edit.redirect_ident == None and edit.revision:
+ return 'create'
+ else:
+ return 'update'
+
+
def changelog_to_elasticsearch(entity):
editgroup = entity.editgroup
t = dict(
index=entity.index,
editgroup_id=entity.editgroup_id,
- timestamp=entity.timestamp,
+ timestamp=entity.timestamp.isoformat(),
editor_id=editgroup.editor_id,
+ username=editgroup.editor.username,
+ is_bot=editgroup.editor.is_bot,
+ is_admin=editgroup.editor.is_admin,
)
extra = editgroup.extra or dict()
if extra.get('agent'):
t['agent'] = extra['agent']
- t['containers'] = len(editgroup.edits.containers)
- t['creators'] = len(editgroup.edits.containers)
- t['files'] = len(editgroup.edits.containers)
- t['filesets'] = len(editgroup.edits.containers)
- t['webcaptures'] = len(editgroup.edits.containers)
- t['releases'] = len(editgroup.edits.containers)
- t['works'] = len(editgroup.edits.containers)
-
- # TODO: parse and pull out counts
- #created = 0
- #updated = 0
- #deleted = 0
- #t['created'] = created
- #t['updated'] = updated
- #t['deleted'] = deleted
- #t['total'] = created + updated + deleted
+ containers = [_type_of_edit(e) for e in editgroup.edits.containers]
+ creators = [_type_of_edit(e) for e in editgroup.edits.creators]
+ files = [_type_of_edit(e) for e in editgroup.edits.files]
+ filesets = [_type_of_edit(e) for e in editgroup.edits.filesets]
+ webcaptures = [_type_of_edit(e) for e in editgroup.edits.webcaptures]
+ releases = [_type_of_edit(e) for e in editgroup.edits.releases]
+ works = [_type_of_edit(e) for e in editgroup.edits.works]
+
+ t['containers'] = len(containers)
+ t['new_containers'] = len([e for e in containers if e == 'create'])
+ t['creators'] = len(creators)
+ t['new_creators'] = len([e for e in creators if e == 'create'])
+ t['files'] = len(files)
+ t['new_files'] = len([e for e in files if e == 'create'])
+ t['filesets'] = len(filesets)
+ t['new_filesets'] = len([e for e in filesets if e == 'create'])
+ t['webcaptures'] = len(webcaptures)
+ t['new_webcaptures'] = len([e for e in webcaptures if e == 'create'])
+ t['releases'] = len(releases)
+ t['new_releases'] = len([e for e in releases if e == 'create'])
+ t['works'] = len(works)
+ t['new_works'] = len([e for e in works if e == 'create'])
+
+ all_edits = containers + creators + files + filesets + webcaptures + releases + works
+
+ t['created'] = len([e for e in all_edits if e == 'create'])
+ t['updated'] = len([e for e in all_edits if e == 'update'])
+ t['deleted'] = len([e for e in all_edits if e == 'delete'])
+ t['total'] = len(all_edits)
return t