aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-10-11 16:50:29 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-10-14 18:11:12 -0700
commitc0c9d4da83b027b081eab364bfc7b807dbe9a2e5 (patch)
tree0558841555cabaa25205a33b88ff0e0d8c5b18d7
parent70bdcb1ba7ce4aeb423fd6c5ff0ac002302fa1e9 (diff)
downloadfatcat-c0c9d4da83b027b081eab364bfc7b807dbe9a2e5.tar.gz
fatcat-c0c9d4da83b027b081eab364bfc7b807dbe9a2e5.zip
ingest: handle datasets, components, other ingest types
-rw-r--r--python/fatcat_tools/transforms/ingest.py16
1 files changed, 15 insertions, 1 deletions
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 42927b2a..9101a4ec 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -32,8 +32,22 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
if (not ingest_type) and release.container_id:
ingest_type = INGEST_TYPE_CONTAINER_MAP.get(release.container_id)
+
if not ingest_type:
- ingest_type = 'pdf'
+ if release.release_type == 'stub':
+ return None
+ elif release.release_type in ['component', 'graphic']:
+ ingest_type = 'component'
+ elif release.release_type == 'dataset':
+ ingest_type = 'dataset'
+ elif release.release_type == 'software':
+ ingest_type = 'software'
+ elif release.release_type == 'post-weblog':
+ ingest_type = 'html'
+ elif release.release_type in ['article-journal', 'article', 'chapter', 'paper-conference', 'book', 'report', 'thesis']:
+ ingest_type = 'pdf'
+ else:
+ ingest_type = 'pdf'
# generate a URL where we expect to find fulltext
url = None