diff options
author | Martin Czygan <martin@archive.org> | 2020-06-05 11:23:29 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2020-06-05 11:23:29 +0000 |
commit | 9dca5a8348f2ab34a013b25f3dae07c1f8db2c19 (patch) | |
tree | 41b1f6094714f8dd1b1b8e54028e1a32202205f4 /python/fatcat_tools | |
parent | 394433ff5e0aba301f2f26e03095079e0ca974ca (diff) | |
parent | 2f233a3b8c00385d4b215361a0fa09f93a05f8d9 (diff) | |
download | fatcat-9dca5a8348f2ab34a013b25f3dae07c1f8db2c19.tar.gz fatcat-9dca5a8348f2ab34a013b25f3dae07c1f8db2c19.zip |
Merge branch 'bnewbold-better-button-links' into 'master'
better download button links
See merge request webgroup/fatcat!57
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 87e054ec..1d35141b 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -508,4 +508,16 @@ def file_to_elasticsearch(entity): t['in_ia'] = bool('archive.org' in t['domains']) t['in_ia_petabox'] = bool('archive.org' in t['hosts']) + any_url = None + good_url = None + best_url = None + for release_url in (entity.urls or []): + any_url = release_url.url + if release_url.rel in ('webarchive', 'repository'): + good_url = release_url.url + if '//web.archive.org/' in release_url.url or '//archive.org/' in release_url.url: + best_url = release_url.url + # here is where we bake-in priority; IA-specific + t['best_url'] = best_url or good_url or any_url + return t |