diff options
| -rw-r--r-- | proposals/2020_elasticsearch_schemas.md | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 12 | 
2 files changed, 6 insertions, 10 deletions
| diff --git a/proposals/2020_elasticsearch_schemas.md b/proposals/2020_elasticsearch_schemas.md index 5fb28d19..c3e79073 100644 --- a/proposals/2020_elasticsearch_schemas.md +++ b/proposals/2020_elasticsearch_schemas.md @@ -33,8 +33,8 @@ status (from `in_kbart`, `in_ia`, etc) to a `preservation_status` flag which  is:  - `bright` -- `dark_only` -- `shadow_only` +- `dark` +- `shadows_only`  - `none`  Note that these don't align with OA color or work-level preservation (aka, no diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 8581febd..87e054ec 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -149,9 +149,6 @@ def release_to_elasticsearch(entity, force_bool=True):              if c_extra.get('road'):                  if c_extra['road'].get('as_of'):                      is_oa = True -            if c_extra.get('ezb'): -                if c_extra['ezb'].get('color') == 'green': -                    is_oa = True              if c_extra.get('szczepanski'):                  if c_extra['szczepanski'].get('as_of'):                      is_oa = True @@ -210,6 +207,8 @@ def release_to_elasticsearch(entity, force_bool=True):          # TODO: more/better checks here, particularly strict *not* OA licenses          if release.license_slug.startswith("CC-"):              is_oa = True +        if release.license_slug.startswith("ARXIV-"): +            is_oa = True      extra = release.extra or dict()      if extra: @@ -293,10 +292,10 @@ def release_to_elasticsearch(entity, force_bool=True):      t['in_ia'] = bool(in_ia)      t['is_preserved'] = bool(is_preserved or in_ia or in_kbart or in_jstor) -    if in_ia: +    if in_ia or t.get('pmcid') or t.get('arxiv_id'):          t['preservation'] = 'bright'      elif in_kbart or in_jstor: -        t['preservation'] = 'dark_only' +        t['preservation'] = 'dark'      elif in_shadows:          t['preservation'] = 'shadows_only'      else: @@ -367,9 +366,6 @@ def container_to_elasticsearch(entity, force_bool=True):      if extra.get('road'):          if extra['road'].get('as_of'):              in_road = True -    if extra.get('ezb'): -        if extra['ezb'].get('color') == 'green': -            is_oa = True      if extra.get('szczepanski'):          if extra['szczepanski'].get('as_of'):              is_oa = True | 
