From c0c9d4da83b027b081eab364bfc7b807dbe9a2e5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 11 Oct 2021 16:50:29 -0700 Subject: ingest: handle datasets, components, other ingest types --- python/fatcat_tools/transforms/ingest.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools/transforms') diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 42927b2a..9101a4ec 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -32,8 +32,22 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type= if (not ingest_type) and release.container_id: ingest_type = INGEST_TYPE_CONTAINER_MAP.get(release.container_id) + if not ingest_type: - ingest_type = 'pdf' + if release.release_type == 'stub': + return None + elif release.release_type in ['component', 'graphic']: + ingest_type = 'component' + elif release.release_type == 'dataset': + ingest_type = 'dataset' + elif release.release_type == 'software': + ingest_type = 'software' + elif release.release_type == 'post-weblog': + ingest_type = 'html' + elif release.release_type in ['article-journal', 'article', 'chapter', 'paper-conference', 'book', 'report', 'thesis']: + ingest_type = 'pdf' + else: + ingest_type = 'pdf' # generate a URL where we expect to find fulltext url = None -- cgit v1.2.3