aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-04-16 01:17:45 +0200
committerMartin Czygan <martin.czygan@gmail.com>2020-04-16 01:22:57 +0200
commit0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a (patch)
tree5979dcd431c8bfc01772662d270c5488764d6c60
parent6e7f02dd3c3c1a87aa614adf9bdace8d8368fe4d (diff)
downloadfatcat-0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a.tar.gz
fatcat-0071b77eb7fc20be4af1bbf9b6c0bfcb4e26816a.zip
changelog: extend release_types considered documents
according to release_rev.release_type, we have 29 values: fatcat_prod=# select release_type, count(release_type) from release_rev group by release_type; release_type | count -------------------+----------- abstract | 2264 article | 6371076 article-journal | 101083841 article-newspaper | 17062 book | 1676941 chapter | 13914854 component | 58990 dataset | 6860325 editorial | 133573 entry | 1628487 graphic | 1809471 interview | 19898 legal_case | 3581 legislation | 1626 letter | 275119 paper-conference | 6074669 peer_review | 30581 post | 245807 post-weblog | 135 report | 1010699 retraction | 1292 review-book | 96219 software | 316 song | 24027 speech | 4263 standard | 312364 stub | 1036813 thesis | 414397 | 0 (29 rows)
-rw-r--r--python/fatcat_tools/workers/changelog.py29
1 files changed, 19 insertions, 10 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index d1e7c2db..fcf3f031 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -153,28 +153,37 @@ class EntityUpdatesWorker(FatcatWorker):
doi = ingest_request.get('ext_ids', {}).get('doi')
is_document = release.release_type in (
- 'article-journal',
- 'paper-conference',
+ 'abstract',
'article',
- 'report',
+ 'article-journal',
+ 'article-newspaper',
+ 'book',
'chapter',
- 'manuscript',
- 'review',
- 'thesis',
- 'letter',
+ 'component',
'editorial',
- 'abstract',
'entry',
+ 'interview',
+ 'legal_case',
+ 'legislation',
+ 'letter',
+ 'manuscript',
+ 'paper-conference',
'patent',
+ 'peer_review',
'post',
+ 'post-weblog',
+ 'report',
+ 'retraction',
+ 'review',
'review-book',
+ 'thesis',
)
is_not_pdf = release.release_type in (
'dataset',
- 'stub',
- 'software',
'figure',
'graphic',
+ 'software',
+ 'stub',
)
# accept list sets a default "crawl it" despite OA metadata for