diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 21:52:58 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 21:59:05 -0800 |
commit | d58c3891ac2122dac53ced606568108f543f2d80 (patch) | |
tree | cf3780f3df09507a0073136a0e1f63a9a04b174c /python/tests | |
parent | 5d458a3df7e58e6551d8ec72979e376c62fdd2f7 (diff) | |
download | fatcat-d58c3891ac2122dac53ced606568108f543f2d80.tar.gz fatcat-d58c3891ac2122dac53ced606568108f543f2d80.zip |
actually implement changelog transform
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/transform_elasticsearch.py | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index ab613a0a..89a4eef8 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -106,9 +106,31 @@ def test_elasticsearch_file_transform(matched_importer): assert es['size_bytes'] == f.size assert es['mimetype'] == f.mimetype assert es['in_ia'] == True - assert 'publisher' in es['rel'] + assert 'web' in es['rel'] # XXX: implement hosts and domain parsing with urlcanon #assert 'journals.plos.org' in es['host'] #assert 'plos.org' in es['domain'] +def test_elasticsearch_changelog_transform(matched_importer): + ce = entity_from_json(open('./tests/files/changelog_3469683.json', 'r').read(), ChangelogEntry) + + es = changelog_to_elasticsearch(ce) + assert es['index'] == 3469683 + # len("2020-01-30T05:04:39") => 19 + assert es['timestamp'][:19] == "2020-01-30T05:04:39.738601Z"[:19] + assert es['editor_id'] == "scmbogxw25evtcesfcab5qaboa" + assert es['username'] == "crawl-bot" + assert es['is_bot'] == True + assert es['is_admin'] == True + assert es['agent'] == "fatcat_tools.IngestFileResultImporter" + + assert es['total'] == 50 + assert es['files'] == 50 + assert es['new_files'] == 50 + assert es['created'] == 50 + + assert es['releases'] == 0 + assert es['new_releases'] == 0 + assert es['updated'] == 0 + assert es['deleted'] == 0 |