diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-04-16 01:17:45 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-04-16 01:22:57 +0200 |
commit | 0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a (patch) | |
tree | 5979dcd431c8bfc01772662d270c5488764d6c60 /python/fatcat_tools | |
parent | 6e7f02dd3c3c1a87aa614adf9bdace8d8368fe4d (diff) | |
download | fatcat-0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a.tar.gz fatcat-0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a.zip |
changelog: extend release_types considered documents
according to release_rev.release_type, we have 29 values:
fatcat_prod=# select release_type, count(release_type) from release_rev group by release_type;
release_type | count
-------------------+-----------
abstract | 2264
article | 6371076
article-journal | 101083841
article-newspaper | 17062
book | 1676941
chapter | 13914854
component | 58990
dataset | 6860325
editorial | 133573
entry | 1628487
graphic | 1809471
interview | 19898
legal_case | 3581
legislation | 1626
letter | 275119
paper-conference | 6074669
peer_review | 30581
post | 245807
post-weblog | 135
report | 1010699
retraction | 1292
review-book | 96219
software | 316
song | 24027
speech | 4263
standard | 312364
stub | 1036813
thesis | 414397
| 0
(29 rows)
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 29 |
1 files changed, 19 insertions, 10 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index d1e7c2db..fcf3f031 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -153,28 +153,37 @@ class EntityUpdatesWorker(FatcatWorker): doi = ingest_request.get('ext_ids', {}).get('doi') is_document = release.release_type in ( - 'article-journal', - 'paper-conference', + 'abstract', 'article', - 'report', + 'article-journal', + 'article-newspaper', + 'book', 'chapter', - 'manuscript', - 'review', - 'thesis', - 'letter', + 'component', 'editorial', - 'abstract', 'entry', + 'interview', + 'legal_case', + 'legislation', + 'letter', + 'manuscript', + 'paper-conference', 'patent', + 'peer_review', 'post', + 'post-weblog', + 'report', + 'retraction', + 'review', 'review-book', + 'thesis', ) is_not_pdf = release.release_type in ( 'dataset', - 'stub', - 'software', 'figure', 'graphic', + 'software', + 'stub', ) # accept list sets a default "crawl it" despite OA metadata for |