diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-11 16:50:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-14 18:11:12 -0700 |
commit | c0c9d4da83b027b081eab364bfc7b807dbe9a2e5 (patch) | |
tree | 0558841555cabaa25205a33b88ff0e0d8c5b18d7 | |
parent | 70bdcb1ba7ce4aeb423fd6c5ff0ac002302fa1e9 (diff) | |
download | fatcat-c0c9d4da83b027b081eab364bfc7b807dbe9a2e5.tar.gz fatcat-c0c9d4da83b027b081eab364bfc7b807dbe9a2e5.zip |
ingest: handle datasets, components, other ingest types
-rw-r--r-- | python/fatcat_tools/transforms/ingest.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 42927b2a..9101a4ec 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -32,8 +32,22 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type= if (not ingest_type) and release.container_id: ingest_type = INGEST_TYPE_CONTAINER_MAP.get(release.container_id) + if not ingest_type: - ingest_type = 'pdf' + if release.release_type == 'stub': + return None + elif release.release_type in ['component', 'graphic']: + ingest_type = 'component' + elif release.release_type == 'dataset': + ingest_type = 'dataset' + elif release.release_type == 'software': + ingest_type = 'software' + elif release.release_type == 'post-weblog': + ingest_type = 'html' + elif release.release_type in ['article-journal', 'article', 'chapter', 'paper-conference', 'book', 'report', 'thesis']: + ingest_type = 'pdf' + else: + ingest_type = 'pdf' # generate a URL where we expect to find fulltext url = None |