summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/transforms
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-02-26 22:04:35 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-02-26 22:05:33 -0800
commit81e0784813500a39955c20278140e25d7940d9c6 (patch)
treea48b51d85bfa27441cf0de2e8689c43cd4e3d048 /python/fatcat_tools/transforms
parent0ab3f66664fd4cc63cf9040e351d725c6a5c22b9 (diff)
downloadfatcat-81e0784813500a39955c20278140e25d7940d9c6.tar.gz
fatcat-81e0784813500a39955c20278140e25d7940d9c6.zip
improve is_oa flag accuracy
Particularly, the ezb=green match seems mostly incorrect. Note that pmcid being assigned could still be in an embargo window?
Diffstat (limited to 'python/fatcat_tools/transforms')
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py12
1 files changed, 4 insertions, 8 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 8581febd..87e054ec 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -149,9 +149,6 @@ def release_to_elasticsearch(entity, force_bool=True):
if c_extra.get('road'):
if c_extra['road'].get('as_of'):
is_oa = True
- if c_extra.get('ezb'):
- if c_extra['ezb'].get('color') == 'green':
- is_oa = True
if c_extra.get('szczepanski'):
if c_extra['szczepanski'].get('as_of'):
is_oa = True
@@ -210,6 +207,8 @@ def release_to_elasticsearch(entity, force_bool=True):
# TODO: more/better checks here, particularly strict *not* OA licenses
if release.license_slug.startswith("CC-"):
is_oa = True
+ if release.license_slug.startswith("ARXIV-"):
+ is_oa = True
extra = release.extra or dict()
if extra:
@@ -293,10 +292,10 @@ def release_to_elasticsearch(entity, force_bool=True):
t['in_ia'] = bool(in_ia)
t['is_preserved'] = bool(is_preserved or in_ia or in_kbart or in_jstor)
- if in_ia:
+ if in_ia or t.get('pmcid') or t.get('arxiv_id'):
t['preservation'] = 'bright'
elif in_kbart or in_jstor:
- t['preservation'] = 'dark_only'
+ t['preservation'] = 'dark'
elif in_shadows:
t['preservation'] = 'shadows_only'
else:
@@ -367,9 +366,6 @@ def container_to_elasticsearch(entity, force_bool=True):
if extra.get('road'):
if extra['road'].get('as_of'):
in_road = True
- if extra.get('ezb'):
- if extra['ezb'].get('color') == 'green':
- is_oa = True
if extra.get('szczepanski'):
if extra['szczepanski'].get('as_of'):
is_oa = True