diff options
-rw-r--r-- | fatcat_covid19/transform.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/fatcat_covid19/transform.py b/fatcat_covid19/transform.py index 0eb1b0a..9616c57 100644 --- a/fatcat_covid19/transform.py +++ b/fatcat_covid19/transform.py @@ -168,6 +168,17 @@ def fulltext_to_elasticsearch(row, force_bool=True): if t['doi']: t['doi_prefix'] = t['doi'].split('/')[0] + # special-case medrxiv/biorxiv content + if not t.get('release_stage') and not t.get('container_name') and t.get('doi', '').startswith('10.1101/20'): + t['container_name'] = 'biorXiv / medrXiv' + t['release_stage'] = 'draft' + if t.get('release_type') in ['post', None]: + t['release_type'] = 'article-journal' + + # special-case arxiv + if not t.get('container_name') and t.get('arxiv_id'): + t['container_name'] = 'arXiv' + # then the fulltext stuff t['fulltext']['status'] = row.get('fulltext_status', 'none') if 'fulltext_file' in row: |