diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-05-21 17:46:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-05-21 17:46:37 -0700 |
commit | be28fd5d4a600bc29507418fddfdc95802abc98f (patch) | |
tree | c32bae2797ff065b8e3668f529ee92c2a33ac6a0 /python/fatcat_tools | |
parent | 1e7c692d00d51875b62686c99b3a782b0c3c8fca (diff) | |
download | fatcat-be28fd5d4a600bc29507418fddfdc95802abc98f.tar.gz fatcat-be28fd5d4a600bc29507418fddfdc95802abc98f.zip |
ingest: add per-container ingest type overrides
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/transforms/ingest.py | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 59831017..9aaeaa84 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -1,4 +1,19 @@ +INGEST_TYPE_CONTAINER_MAP = { + # Optica + "twtpsm6ytje3nhuqfu3pa7ca7u": "html", + # Optics Express + "cg4vcsfty5dfvgmat5wm62wgie": "html", + # First Monday + "svz5ul6qozdjhjhk7d627avuja": "html", + # D-Lib Magazine + "ugbiirfvufgcjkx33r3cmemcuu": "html", + # Distill (distill.pub) + "lx7svdzmc5dl3ay4zncjjrql7i": "html", + # NLM technical bulletin + "lovwr7ladjagzkhmoaszg7efqu": "html", +} + def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=None): """ Takes a full release entity object and returns an ingest request (as dict), @@ -15,7 +30,8 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type= if release.state != 'active': return None - # TODO: infer ingest type based on release_type or container metadata? + if (not ingest_type) and release.container_id: + ingest_type = INGEST_TYPE_CONTAINER_MAP.get(release.container_id) if not ingest_type: ingest_type = 'pdf' |