aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-03 18:14:44 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-03 18:14:44 -0700
commit48ee0ea51425d6050bb165632fa8bbf4a5b84a47 (patch)
tree4b4ef4f9e25eeaefe8bfb80d46426ffb76a8c0ac
parentb66342bab2b58f94053066d9785fade037837b45 (diff)
downloadfatcat-scholar-48ee0ea51425d6050bb165632fa8bbf4a5b84a47.tar.gz
fatcat-scholar-48ee0ea51425d6050bb165632fa8bbf4a5b84a47.zip
compute and use tags
-rw-r--r--fatcat_scholar/search.py3
-rw-r--r--fatcat_scholar/transform.py41
2 files changed, 42 insertions, 2 deletions
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py
index 76a2f70..d29e03b 100644
--- a/fatcat_scholar/search.py
+++ b/fatcat_scholar/search.py
@@ -123,8 +123,7 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
# availability filters
if query.filter_availability == "oa":
- # TODO: real OA filter/flag
- search = search.filter("term", tag="OA")
+ search = search.filter("term", tag="oa")
elif query.filter_availability == "everything":
pass
elif query.filter_availability == "fulltext" or query.filter_availability == None:
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index ab63aa6..4538d70 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -243,6 +243,47 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
# TODO: additional abstracts
+ # tags
+ if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"):
+ tags.append('oa')
+ if primary_release and primary_release.container:
+ container = primary_release.container
+ if container.extra:
+ if container.extra.get('doaj'):
+ tags.append('doaj')
+ tags.append('oa')
+ if container.extra.get('road'):
+ tags.append('road')
+ tags.append('oa')
+ if container.extra('szczepanski'):
+ tags.append('szczepanski')
+ tags.append('oa')
+ if container.extra.get('ia', {}).get('longtail_oa'):
+ tags.append('longtail')
+ tags.append('oa')
+ if container.extra.get('sherpa_romeo', {}).get('color') == 'white':
+ tags.append('oa')
+ if container.extra.get('default_license', '').lower().startswith('cc-'):
+ tags.append('oa')
+ if container.extra.get('platform'):
+ # scielo, ojs, wordpress, etc
+ tags.append(container.extra['platform'].lower())
+ if biblio.doi_prefix == '10.2307':
+ tags.append('jstor')
+
+ # biorxiv/medrxiv hacks
+ if not biblio.container_name and biblio.release_stage != "published":
+ for _, acc in access_dict.items():
+ if "://www.medrxiv.org/" in acc.access_url:
+ biblio.container_name = 'medRxiv'
+ if biblio.release_stage == None:
+ biblio.release_stage = "submitted"
+ elif "://www.biorxiv.org/" in acc.access_url:
+ biblio.container_name = 'bioRxiv'
+ if biblio.release_stage == None:
+ biblio.release_stage = "submitted"
+ tags = list(set(tags))
+
return ScholarDoc(
key=key,
doc_type=heavy.doc_type.value,