aboutsummaryrefslogtreecommitdiffstats
path: root/pig/filter-cdx-paper-pdfs.pig
diff options
context:
space:
mode:
Diffstat (limited to 'pig/filter-cdx-paper-pdfs.pig')
-rw-r--r--pig/filter-cdx-paper-pdfs.pig2
1 files changed, 1 insertions, 1 deletions
diff --git a/pig/filter-cdx-paper-pdfs.pig b/pig/filter-cdx-paper-pdfs.pig
index 7e10720..402d340 100644
--- a/pig/filter-cdx-paper-pdfs.pig
+++ b/pig/filter-cdx-paper-pdfs.pig
@@ -30,7 +30,7 @@ cdx = FILTER cdx
OR surt matches '(?i).+\\).*/(pubs|research|publications?|articles?|proceedings?|papers?|fulltext)/.*'
-- words in domains
- OR surt matches '.*(,hal|,eprint|scielo|redalyc|revues|revistas|research|journal).*\\).*'
+ OR surt matches '.*(,hal|,eprint|,ojs|,dspace|scielo|redalyc|revues|revistas|research|journal).*\\).*'
-- DOI-like pattern in URL
OR surt matches '.*\\).*/10\\.\\d{3,5}/.*';