summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
commit99885b458ad505ebb63b3e7cf5b1bae3dd2a459e (patch)
treede3fbb3e42b0bb7f6e447d2e13ac3f92a8bb90b2 /python
parent950d3f08bd439aed92d01dbc3cca9747570aa82c (diff)
parent56e4ce2d8347cdfedd492d54fde080772f3d8725 (diff)
downloadfatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.tar.gz
fatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.zip
Merge branch 'bnewbold-refs-apis'
Diffstat (limited to 'python')
-rw-r--r--python/.pylintrc2
-rw-r--r--python/fatcat_tools/references.py429
-rw-r--r--python/fatcat_tools/transforms/access.py60
-rw-r--r--python/fatcat_web/__init__.py2
-rw-r--r--python/fatcat_web/forms.py41
-rw-r--r--python/fatcat_web/ref_routes.py168
-rw-r--r--python/fatcat_web/routes.py2
-rw-r--r--python/fatcat_web/templates/entity_base.html5
-rw-r--r--python/fatcat_web/templates/entity_macros.html109
-rw-r--r--python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html25
-rw-r--r--python/fatcat_web/templates/reference_match.html93
-rw-r--r--python/fatcat_web/templates/refs_macros.html132
-rw-r--r--python/fatcat_web/templates/release_view.html27
-rw-r--r--python/fatcat_web/templates/release_view_fuzzy_refs.html27
-rw-r--r--python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html23
-rw-r--r--python/tests/files/elastic_empty.json1
-rw-r--r--python/tests/files/elastic_refs_in_release.json360
-rw-r--r--python/tests/files/elastic_refs_out_release.json683
-rw-r--r--python/tests/web_refs.py54
19 files changed, 2228 insertions, 15 deletions
diff --git a/python/.pylintrc b/python/.pylintrc
index d3003620..5fc310a8 100644
--- a/python/.pylintrc
+++ b/python/.pylintrc
@@ -14,3 +14,5 @@ notes=FIXME,XXX,DELETEME
ignored-modules=responses
# FileEntityForm' has no 'data' member (no-member) (etc)
generated-members=data,errors
+# No name 'BaseModel' in module 'pydantic' (no-name-in-module) (etc)
+extension-pkg-allow-list=pydantic
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
new file mode 100644
index 00000000..496a46e1
--- /dev/null
+++ b/python/fatcat_tools/references.py
@@ -0,0 +1,429 @@
+"""
+Helper routines for working with the fatcat citation graph, which is a separate
+index of reference links between works in the main catalog.
+
+See bulk citation and citation API proposals for design documentation.
+"""
+
+import sys
+import datetime
+import argparse
+from typing import Optional, List, Any, Dict, Union
+
+from pydantic import BaseModel, validator
+import elasticsearch
+from elasticsearch_dsl import Search
+from fatcat_openapi_client import ReleaseEntity
+
+from fatcat_tools import public_api
+from fatcat_tools.transforms.access import release_access_options, AccessOption
+from fatcat_tools.transforms.entities import entity_to_dict
+
+
+class BiblioRef(BaseModel):
+ """bibliographic reference"""
+ # ("release", source_release_ident, ref_index)
+ # ("wikipedia", source_wikipedia_article, ref_index)
+ _key: Optional[str]
+ update_ts: Optional[datetime.datetime]
+
+ # metadata about source of reference
+ source_release_ident: Optional[str]
+ source_work_ident: Optional[str]
+ # with lang prefix like "en:Superglue"
+ source_wikipedia_article: Optional[str]
+ source_release_stage: Optional[str]
+ source_year: Optional[int]
+
+ # context of the reference itself
+ # 1-indexed, not 0-indexed
+ ref_index: Optional[int] # TODO: actually optional?
+ # eg, "Lee86", "BIB23"
+ ref_key: Optional[str]
+ # eg, page number
+ ref_locator: Optional[str]
+
+ # target of reference (identifiers)
+ target_release_ident: Optional[str]
+ target_work_ident: Optional[str]
+ target_openlibrary_work: Optional[str]
+ # TODO: target_url_surt: Optional[str]
+ # would not be stored in elasticsearch, but would be auto-generated by all "get" methods from the SURT, so calling code does not need to do SURT transform
+ target_url: Optional[str]
+
+ # crossref, pubmed, grobid, etc
+ match_provenance: Optional[str]
+ # strong, weak, etc
+ match_status: Optional[str]
+ # TODO: "match_strength"?
+ # "doi", "isbn", "fuzzy title, author", etc
+ # maybe "fuzzy-title-author"?
+ match_reason: Optional[str]
+
+ # only if no release_ident link/match
+ target_unstructured: Optional[str]
+ target_csl: Optional[Dict[str, Any]]
+
+ def hacks(self):
+ """
+ Temporary (?) hacks to work around schema/data issues
+ """
+ if self.target_openlibrary_work and self.target_openlibrary_work.startswith("/works/"):
+ self.target_openlibrary_work = self.target_openlibrary_work[7:]
+
+ # work-arounds for bad/weird ref_key
+ if self.ref_key:
+ self.ref_key = self.ref_key.strip()
+ if self.ref_key[0] in ['/', '_']:
+ self.ref_key = self.ref_key[1:]
+ if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key:
+ self.ref_key = self.ref_key.split('-')[-1]
+ if self.ref_key.startswith("10.") and '_' in self.ref_key:
+ self.ref_key = self.ref_key.split('_')[-1]
+ if len(self.ref_key) > 10 and "#" in self.ref_key:
+ self.ref_key = self.ref_key.split('#')[-1]
+ if len(self.ref_key) > 10 and "_" in self.ref_key:
+ self.ref_key = self.ref_key.split('_')[-1]
+ if not self.ref_key and self.ref_index is not None:
+ self.ref_key = str(self.ref_index)
+ return self
+
+
+class EnrichedBiblioRef(BaseModel):
+ # enriched version of BiblioRef with complete ReleaseEntity object as
+ # fetched from the fatcat API. CSL-JSON metadata would be derived from
+ # the full release entity.
+ ref: BiblioRef
+ release: Optional[ReleaseEntity]
+ # TODO: openlibrary work?
+ access: List[AccessOption]
+
+ @validator('release')
+ @classmethod
+ def check_release(cls, v):
+ if v is not None and not isinstance(v, ReleaseEntity):
+ raise ValueError("expected a ReleaseEntity")
+ return v
+
+ class Config:
+ arbitrary_types_allowed = True
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
+
+
+class RefHits(BaseModel):
+ count_returned: int
+ count_total: int
+ offset: int
+ limit: int
+ query_time_ms: int
+ query_wall_time_ms: int
+ result_refs: List[Union[BiblioRef,EnrichedBiblioRef]]
+
+ class Config:
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
+
+
+def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) -> RefHits:
+ """
+ Internal helper for querying elasticsearch refs index and transforming hits
+ """
+
+ limit = min((int(limit or 15), 200))
+ if not offset or offset < 0:
+ offset = 0
+
+ search = search.params(track_total_hits=True)
+ search = search[offset : (offset + limit)]
+
+ query_start = datetime.datetime.now()
+ try:
+ resp = search.execute()
+ except elasticsearch.exceptions.RequestError as e_raw:
+ # this is a "user" error
+ e: Any = e_raw
+ #logging.warn("elasticsearch 400: " + str(e.info))
+ if e.info.get("error", {}).get("root_cause", {}):
+ raise ValueError(str(e.info["error"]["root_cause"][0].get("reason"))) from e
+ else:
+ raise ValueError(str(e.info)) from e
+ except elasticsearch.exceptions.TransportError as e:
+ # all other errors
+ #logging.warn(f"elasticsearch non-200 status code: {e.info}")
+ raise IOError(str(e.info)) from e
+ query_delta = datetime.datetime.now() - query_start
+
+ result_refs = []
+ for h in resp.hits:
+ # might be a list because of consolidation
+ if isinstance(h._d_.get('source_work_ident'), list):
+ h._d_['source_work_ident'] = h._d_['source_work_ident'][0]
+ result_refs.append(BiblioRef.parse_obj(h._d_).hacks())
+
+ return RefHits(
+ count_returned=len(result_refs),
+ # ES 7.x style "total"
+ count_total=resp.hits.total.value,
+ offset=offset,
+ limit=limit,
+ query_time_ms=int(resp.took),
+ query_wall_time_ms=int(query_delta.total_seconds() * 1000),
+ result_refs=result_refs,
+ )
+
+
+def get_outbound_refs(
+ es_client: Any,
+ release_ident: Optional[str] = None,
+ work_ident: Optional[str] = None,
+ wikipedia_article: Optional[str] = None,
+ limit: int = 100,
+ offset: Optional[int] = None,
+ es_index: str = "fatcat_ref",
+) -> RefHits:
+
+ search = Search(using=es_client, index=es_index)
+
+ if release_ident:
+ search = search.filter("term", source_release_ident=release_ident)
+ elif work_ident:
+ search = search.filter("term", source_work_ident=work_ident)
+ elif wikipedia_article:
+ search = search.filter("term", source_wikipedia_article=wikipedia_article)
+ else:
+ raise ValueError("require a lookup key")
+
+ search = search.sort("ref_index")
+
+ # re-sort by index
+ hits = _execute_ref_query(search, limit=limit, offset=offset)
+ hits.result_refs = sorted(hits.result_refs, key=lambda r: r.ref_index or 0)
+ return hits
+
+
+def get_inbound_refs(
+ es_client: Any,
+ release_ident: Optional[str] = None,
+ work_ident: Optional[str] = None,
+ openlibrary_work: Optional[str] = None,
+ url: Optional[str] = None,
+ consolidate_works: bool = True,
+ filter_stage: List[str] = [],
+ sort: Optional[str] = None,
+ limit: int = 25,
+ offset: Optional[int] = None,
+ es_index: str = "fatcat_ref",
+) -> List[BiblioRef]:
+
+ search = Search(using=es_client, index=es_index)
+
+ if consolidate_works:
+ search = search.extra(
+ collapse={
+ "field": "source_work_ident",
+ "inner_hits": {"name": "source_more", "size": 0,},
+ }
+ )
+
+ if release_ident:
+ search = search.filter("term", target_release_ident=release_ident)
+ elif work_ident:
+ search = search.filter("term", target_work_ident=work_ident)
+ elif openlibrary_work:
+ search = search.filter("term", target_openlibrary_work=openlibrary_work)
+ else:
+ raise ValueError("require a lookup key")
+
+ if filter_stage:
+ search = search.filter("term", source_stage=filter_stage)
+
+ if sort == "newest":
+ search = search.sort("-source_year")
+ elif sort == "oldest":
+ search = search.sort("source_year")
+ else:
+ search = search.sort("-source_year")
+
+ return _execute_ref_query(search, limit=limit, offset=offset)
+
+
+def count_inbound_refs(
+ es_client: Any,
+ release_ident: Optional[str] = None,
+ work_ident: Optional[str] = None,
+ openlibrary_work: Optional[str] = None,
+ url: Optional[str] = None,
+ filter_stage: List[str] = [],
+ es_index: str = "fatcat_ref",
+) -> int:
+ """
+ Same parameters as get_inbound_refs(), but returns just a count
+ """
+
+ search = Search(using=es_client, index=es_index)
+
+ if release_ident:
+ search = search.filter("term", target_release_ident=release_ident)
+ elif work_ident:
+ search = search.filter("term", target_work_ident=work_ident)
+ elif openlibrary_work:
+ search = search.filter("term", target_openlibrary_work=openlibrary_work)
+ else:
+ raise ValueError("require a lookup key")
+
+ if filter_stage:
+ search = search.filter("term", source_stage=filter_stage)
+
+ return search.count()
+
+
+# run fatcat API fetches for each ref and return "enriched" refs
+def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
+ enriched = []
+ for ref in refs:
+ release = None
+ access = []
+ if ref.source_release_ident:
+ release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand)
+ access = release_access_options(release)
+ if ref.source_wikipedia_article:
+ wiki_lang = ref.source_wikipedia_article.split(':')[0]
+ wiki_article = ':'.join(ref.source_wikipedia_article.split(':')[1:]).replace(' ', '_')
+ access.append(AccessOption(
+ access_type="wikipedia",
+ access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
+ ))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
+ return enriched
+
+
+def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
+ enriched = []
+ for ref in refs:
+ release = None
+ access = []
+ if ref.target_release_ident:
+ release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand)
+ access = release_access_options(release)
+ if ref.target_openlibrary_work:
+ access.append(AccessOption(
+ access_type="openlibrary",
+ access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
+ ))
+ if ref.target_url and '://web.archive.org/' in ref.target_url:
+ access.append(AccessOption(
+ access_type="wayback",
+ access_url=ref.target_url,
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
+ ))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
+ return enriched
+
+
+def run_ref_query(args) -> None:
+ """
+ CLI helper/debug tool (prints to stdout)
+ """
+ release_ident = None
+ work_ident = None
+ if args.ident.startswith("release_"):
+ release_ident = args.ident.split('_')[1]
+ elif args.ident.startswith("work_"):
+ work_ident = args.ident.split('_')[1]
+ else:
+ release_ident = args.ident
+
+ print("## Outbound References")
+ hits = get_outbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client)
+ print(f"Total: {hits.count_total} Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms")
+
+ if args.enrich == "fatcat":
+ enriched = enrich_outbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client)
+ for ref in enriched:
+ if ref.release:
+ print(f"{ref.ref.ref_index or '-'}\trelease_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}")
+ else:
+ print(f"{ref.ref.ref_index or '-'}\trelease_{ref.target_release_ident}")
+ else:
+ for ref in hits.result_refs:
+ print(f"{ref.ref.ref_index or '-'}\trelease_{ref.target_release_ident}")
+
+ print()
+ print("## Inbound References")
+ hits = get_inbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client)
+
+ print(f"Total: {hits.count_total} Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms")
+
+ if args.enrich == "fatcat":
+ enriched = enrich_inbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client)
+ for ref in enriched:
+ if ref.release:
+ print(f"release_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}")
+ else:
+ print(f"release_{ref.target_release_ident}")
+ else:
+ for ref in hits.result_refs:
+ print(f"work_{ref.source_work_ident}\trelease_{ref.source_release_ident}")
+
+def main() -> None:
+ """
+ Run this utility like:
+
+ python -m fatcat_tools.references
+
+ Examples:
+
+ python -m fatcat_tools.references query release_pfrind3kh5hqhgqkueulk2tply
+ """
+
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ subparsers = parser.add_subparsers()
+
+ parser.add_argument("--fatcat-api-base", default="https://api.fatcat.wiki/v0")
+ parser.add_argument("--elasticsearch-base", default="https://search.fatcat.wiki")
+ parser.add_argument("--elasticsearch-ref-index", default="fatcat_ref")
+
+ sub = subparsers.add_parser(
+ "query",
+ help="takes a fatcat ident argument, prints both inbound and outbound references",
+ )
+ sub.set_defaults(func="run_ref_query")
+ sub.add_argument("ident", type=str)
+ sub.add_argument("--enrich", type=str)
+
+ args = parser.parse_args()
+ if not args.__dict__.get("func"):
+ parser.print_help(file=sys.stderr)
+ sys.exit(-1)
+
+ args.es_client = elasticsearch.Elasticsearch(args.elasticsearch_base)
+ args.fatcat_api_client = public_api(args.fatcat_api_base)
+
+ if args.func == "run_ref_query":
+ run_ref_query(args)
+ else:
+ raise NotImplementedError(args.func)
+
+if __name__ == "__main__":
+ main()
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
new file mode 100644
index 00000000..5ed64c7c
--- /dev/null
+++ b/python/fatcat_tools/transforms/access.py
@@ -0,0 +1,60 @@
+
+from enum import Enum
+from typing import Optional, List
+
+from pydantic import BaseModel
+from fatcat_openapi_client import ReleaseEntity
+
+
+class AccessType(str, Enum):
+ """describes type of access URL"""
+
+ wayback = "wayback"
+ ia_file = "ia_file"
+ ia_microfilm = "ia_microfilm"
+ repository = "repository"
+ openlibrary = "openlibrary"
+ wikipedia = "wikipedia"
+
+class AccessOption(BaseModel):
+
+ access_type: AccessType
+
+ # note: for `target_url` refs, would do a CDX lookup and this URL would be
+ # a valid/HTTP-200 web.archive.org capture URL
+ access_url: str
+
+ # application/pdf, text/html, etc
+ # blank for landing pages
+ mimetype: Optional[str]
+
+ size_bytes: Optional[int]
+ thumbnail_url: Optional[str]
+
+
+def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
+ """
+ Extracts access options from a release.
+
+ TODO: proper implementation
+ """
+ options = []
+ for f in (release.files or []):
+ for u in (f.urls or []):
+ if '://web.archive.org/' in u.url:
+ return [AccessOption(
+ access_type="wayback",
+ access_url=u.url,
+ mimetype=f.mimetype,
+ size_bytes=f.size,
+ thumbnail_url=None
+ )]
+ elif '://archive.org/' in u.url:
+ return [AccessOption(
+ access_type="ia_file",
+ access_url=u.url,
+ mimetype=f.mimetype,
+ size_bytes=f.size,
+ thumbnail_url=None
+ )]
+ return options
diff --git a/python/fatcat_web/__init__.py b/python/fatcat_web/__init__.py
index 07b4e083..3207bc75 100644
--- a/python/fatcat_web/__init__.py
+++ b/python/fatcat_web/__init__.py
@@ -76,7 +76,7 @@ app.register_blueprint(mwoauth.bp, url_prefix='/auth/wikipedia')
app.es_client = elasticsearch.Elasticsearch(Config.ELASTICSEARCH_BACKEND)
-from fatcat_web import routes, editing_routes, auth, cors, forms
+from fatcat_web import routes, editing_routes, ref_routes, auth, cors, forms
# TODO: blocking on ORCID support in loginpass
if Config.ORCID_CLIENT_ID:
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index 1c9fb199..19176a59 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -482,3 +482,44 @@ class EntityTomlForm(EntityEditForm):
etf.toml.data = entity_to_toml(entity, pop_fields=pop_fields)
return etf
+
+class ReferenceMatchForm(FlaskForm):
+
+ submit_type = SelectField('submit_type',
+ [validators.DataRequired()],
+ choices=['parse', 'match'])
+
+ raw_citation = TextAreaField("Citation String", render_kw={'rows':'3'})
+
+ title = StringField("Title")
+ journal = StringField("Journal or Conference")
+ first_author = StringField("First Author")
+ #year = IntegerField('Year Released',
+ # [validators.Optional(True), valid_year])
+ year = StringField("Year Released")
+ volume = StringField("Volume")
+ issue = StringField("Issue")
+ pages = StringField("Pages")
+
+ @staticmethod
+ def from_grobid_parse(parse_dict, raw_citation):
+ """
+ Initializes form from GROBID extraction
+ """
+ rmf = ReferenceMatchForm()
+ rmf.raw_citation.data = raw_citation
+
+ direct_fields = ['title', 'journal', 'volume', 'issue', 'pages']
+ for k in direct_fields:
+ if parse_dict.get(k):
+ a = getattr(rmf, k)
+ a.data = parse_dict[k]
+
+ date = parse_dict.get('date')
+ if date and len(date) >= 4 and date[0:4].isdigit():
+ rmf.year.data = int(date[0:4])
+
+ if parse_dict.get('authors'):
+ rmf.first_author.data = parse_dict['authors'][0].get('name')
+
+ return rmf
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
new file mode 100644
index 00000000..d4219012
--- /dev/null
+++ b/python/fatcat_web/ref_routes.py
@@ -0,0 +1,168 @@
+"""
+Flask endpoints for reference (citation) endpoints. Eg, listing references
+"inbound" and "outbound" from a specific release or work.
+"""
+
+from flask import render_template, request, Response
+from fatcat_openapi_client import *
+from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
+from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
+
+from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
+from fatcat_tools.transforms.access import release_access_options
+from fatcat_web import app, api
+from fatcat_web.cors import crossdomain
+from fatcat_web.forms import *
+from fatcat_web.entity_helpers import *
+
+def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None, wikipedia_article=None) -> RefHits:
+ offset = request.args.get('offset', '0')
+ offset = max(0, int(offset)) if offset.isnumeric() else 0
+ limit = request.args.get('limit', '30')
+ limit = min(max(0, int(limit)), 100) if limit.isnumeric() else 30
+ if direction == "in":
+ hits = get_inbound_refs(
+ release_ident=release_ident,
+ work_ident=work_ident,
+ openlibrary_work=openlibrary_id,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_inbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ elif direction == "out":
+ hits = get_outbound_refs(
+ release_ident=release_ident,
+ wikipedia_article=wikipedia_article,
+ work_ident=work_ident,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_outbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ else:
+ raise ValueError()
+ return hits
+
+
+@app.route('/release/<string(length=26):ident>/refs-in', methods=['GET'])
+def release_view_refs_inbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_inbound_json(ident)
+
+ release = generic_get_entity("release", ident)
+ hits = _refs_web("in", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits), 200
+
+
+@app.route('/release/<string(length=26):ident>/refs-out', methods=['GET'])
+def release_view_refs_outbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_outbound_json(ident)
+
+ release = generic_get_entity("release", ident)
+ hits = _refs_web("out", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits), 200
+
+@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET'])
+def openlibrary_view_refs_inbound(id_num):
+ if request.accept_mimetypes.best == "application/json":
+ return openlibrary_view_refs_inbound_json(id_num)
+
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return render_template('openlibrary_view_fuzzy_refs.html', openlibrary_id=openlibrary_id, direction="in", hits=hits), 200
+
+@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out', methods=['GET'])
+def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):
+ if request.accept_mimetypes.best == "application/json":
+ return wikipedia_view_refs_outbound_json(wiki_lang, wiki_article)
+
+ wiki_url = f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}"
+ wiki_article = wiki_article.replace('_', ' ')
+ wikipedia_article = wiki_lang + ":" + wiki_article
+ hits = _refs_web("out", wikipedia_article=wikipedia_article)
+ return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200
+
+
+@app.route('/reference/match', methods=['GET', 'POST'])
+def reference_match():
+
+ form = ReferenceMatchForm()
+ grobid_status = None
+ grobid_dict = None
+
+ if form.is_submitted():
+ if form.validate_on_submit():
+ if form.submit_type.data == 'parse':
+ resp_xml = grobid_api_process_citation(form.raw_citation.data)
+ if not resp_xml:
+ grobid_status = "failed"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 400
+ grobid_dict = transform_grobid_ref_xml(resp_xml)
+ if not grobid_dict:
+ grobid_status = "empty"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 200
+ #print(grobid_dict)
+ release_stub = grobid_ref_to_release(grobid_dict)
+ # remove empty values from GROBID parsed dict
+ grobid_dict = {k: v for k, v in grobid_dict.items() if v is not None}
+ form = ReferenceMatchForm.from_grobid_parse(grobid_dict, form.raw_citation.data)
+ grobid_status = "success"
+ matches = close_fuzzy_release_matches(es_client=app.es_client, release=release_stub, match_limit=10) or []
+ elif form.submit_type.data == 'match':
+ matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+ else:
+ raise NotImplementedError()
+
+ for m in matches:
+ # expand releases more completely
+ m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+ # hack in access options
+ m.access_options = release_access_options(m.release)
+
+ return render_template('reference_match.html', form=form, grobid_dict=grobid_dict, grobid_status=grobid_status, matches=matches), 200
+
+ elif form.errors:
+ return render_template('reference_match.html', form=form), 400
+
+ return render_template('reference_match.html', form=form), 200
+
+
+### Pseudo-APIs #############################################################
+
+@app.route('/release/<string(length=26):ident>/refs-out.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_outbound_json(ident):
+ hits = _refs_web("out", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/release/<string(length=26):ident>/refs-in.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_inbound_json(ident):
+ hits = _refs_web("in", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+@app.route('/openlibrary/OL<int:id_num>W/refs-in.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def openlibrary_view_refs_inbound_json(id_num):
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):
+ wiki_article = wiki_article.replace('_', ' ')
+ wikipedia_article = wiki_lang + ":" + wiki_article
+ hits = _refs_web("out", wikipedia_article=wikipedia_article)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 144922a8..ab060c45 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -1128,12 +1128,14 @@ def page_edit_conflict(e):
@app.errorhandler(500)
def page_server_error(e):
+ app.log.error(e)
return render_template('500.html'), 500
@app.errorhandler(502)
@app.errorhandler(503)
@app.errorhandler(504)
def page_server_down(e):
+ app.log.error(e)
return render_template('503.html'), 503
@app.errorhandler(ApiException)
diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html
index 36280f5d..52acd70a 100644
--- a/python/fatcat_web/templates/entity_base.html
+++ b/python/fatcat_web/templates/entity_base.html
@@ -85,7 +85,10 @@
{{ entity_tab("coverage", "Coverage", "/coverage") }}
{% elif entity_type == "release" and entity.state != 'deleted' %}
{{ entity_tab("contribs", "Authors", "/contribs", entity._authors|count ) }}
- {{ entity_tab("references", "References", "/references", entity.refs|count) }}
+ {% if entity.state == 'active' %}
+ {{ entity_tab("refs-out", "References", "/refs-out") }}
+ {{ entity_tab("refs-in", "Cited By", "/refs-in") }}
+ {% endif %}
{% endif %}
{{ entity_tab("metadata", "Metadata", "/metadata") }}
</div>
diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html
index 50f45753..6b565f69 100644
--- a/python/fatcat_web/templates/entity_macros.html
+++ b/python/fatcat_web/templates/entity_macros.html
@@ -387,3 +387,112 @@ yellow
</table>
{%- endmacro %}
+
+{# this is useful for things like showing lists of releases in tables #}
+{% macro release_summary(release) %}
+ <b><a href="/release/{{ release.ident }}">{{ release.title }}</a></b>
+ {% if release.release_type not in ["article-journal", "paper-conference"] %}
+ <b>[{{ release.release_type or "unknown-type" }}]</b>
+ {% endif %}
+ {% if release.contribs %}<br>{% endif %}
+ {% for contrib in release.contribs[:8] %}
+ {% if contrib.creator %}
+ <a href="/contib/{{ contrib.creator.ident }}" style="color: black;">{{ contrib.creator.display_name }}</a>
+ {% else %}
+ {{ contrib.raw_name }}
+ {%- endif %}
+ {%- if not loop.last %}, {% endif %}
+ {% endfor %}
+ {% if release.contribs | length > 8 %} <i>(+ more)</i> {%endif %}
+ {% if release.release_year or release.container or (release.extra and release.extra.container_name) %}<br>{% endif %}
+ {% if release.release_year %}
+ {% if release.release_date %}
+ <span title="{{ release.release_date }}">{{ release.release_year }}</span>
+ {% else %}
+ {{ release.release_year }}
+ {% endif %}
+ &nbsp;
+ {% endif %}
+ {% if release.container %}
+ <a href="/container/{{ release.container.ident }}" style="color: black;"><i>{{ release.container.name }}</i></a>
+ {% elif release.extra and release.extra.container_name %}
+ <i>{{ release.extra.container_name }}</i>
+ {% endif %}
+
+ {% if release.release_stage == "submitted" %}
+ &nbsp;<b style="color: brown; text-transform: uppercase;">pre-print</b>
+ {% elif release.release_stage and release.release_stage != "published" %}
+ &nbsp;<b style="color: brown; text-transform: uppercase;">{{ release.release_stage }} version</b>
+ {% elif not release.release_stage %}
+ &nbsp;<b style="color: brown; text-transform: uppercase;">unpublished</b>
+ {% endif %}
+<br>
+ {% if release.version %}
+ <span style="color:green">version:{{ release.version }}</span>&nbsp;
+ {% endif %}
+ {% if release.number %}
+ <span style="color:green">number:{{ release.number }}</span>&nbsp;
+ {% endif %}
+ {% if release.ext_ids.doi %}
+ <a href="https://doi.org/{{ release.ext_ids.doi }}" style="color:green;">doi:{{ release.ext_ids.doi }}</a>&nbsp;
+ {% endif %}
+ {# TODO: links #}
+ {% if release.ext_ids.arxiv %}
+ <a href="#" style="color:green;">arXiv:{{ release.ext_ids.arxiv }}</a>&nbsp;
+ {% endif %}
+ {% if release.ext_ids.pmcid %}
+ <a href="#" style="color:green;">pmcid:{{ release.ext_ids.pmcid }}</a>&nbsp;
+ {% endif %}
+ {% if release.ext_ids.pmid %}
+ <a href="#" style="color:green;">pmid:{{ release.ext_ids.pmid }}</a>&nbsp;
+ {% endif %}
+ {% if release.ext_ids.dblp %}
+ <a href="#" style="color:green;">dblp:{{ release.ext_ids.dblp }}</a>&nbsp;
+ {% endif %}
+{% endmacro %}
+
+{# similar to the release_summary above, but for CSL-JSON #}
+{% macro csl_summary(csl) %}
+ <b>{{ csl.title }}</b>
+ {% if csl.title and csl.author %}<br>{% endif %}
+ {% if csl.author %}
+ {% for author in csl.author[:8] %}
+ {% if author.literal %}
+ {{ author.literal }}
+ {% elif author.raw_name %}
+ {{ author.raw_name }}
+ {% elif author.family and author.given %}
+ {{ author.given }} {{ author.family }}
+ {% elif author.family %}
+ {{ author.family }}
+ {% elif author.name %}
+ {# DEPRECATED: was used by refs code path for a while. Delete in, eg, year 2022 #}
+ {{ author.name }}
+ {% endif %}
+ {%- if not loop.last %}, {% endif %}
+ {% endfor %}
+ {% if csl.author | length > 8 %} <i>(+ more)</i> {%endif %}
+ {% endif %}
+
+ {% if csl.issued or csl["container-title"] %}<br>{% endif %}
+ {% if csl.issued and csl.issued is mapping %}
+ {% if csl.issued['date-parts'] %}
+ {{ csl.issued['date-parts'][0][0] }} &nbsp;
+ {% elif csl.issued.raw %}
+ {{ csl.issued.raw }} &nbsp;
+ {% endif %}
+ {% endif %}
+ {% if csl["container-title"] %}
+ <i>{{ csl["container-title"] }}</i>
+ {% endif %}
+ <br>
+ {% if csl.volume %}
+ <span style="color:green">volume:{{ csl.volume}}</span>&nbsp;
+ {% endif %}
+ {% if csl.DOI %}
+ <a href="https://doi.org/{{ csl.DOI }}" style="color:green;">doi:{{ csl.DOI }}</a>&nbsp;
+ {% endif %}
+ {% if csl.URL %}
+ <a href="{{ csl.URL }}" style="color:green;">url:{{ csl.URL }}</a>&nbsp;
+ {% endif %}
+{% endmacro %}
diff --git a/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html
new file mode 100644
index 00000000..21bf76f2
--- /dev/null
+++ b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html
@@ -0,0 +1,25 @@
+{% extends "base.html" %}
+{% import "refs_macros.html" as refs_macros %}
+
+{% block title %}Open Library Refs{% endblock %}
+
+{% block fullbody %}
+<h1 class="ui header">
+ {% if hits.result_refs and hits.result_refs[0].ref.target_unstructured %}
+ <i>{{ hits.result_refs[0].ref.target_unstructured }}</i>
+ {% endif %}
+ <span class="sub header"><a href="https://openlibrary.org/works/{{ openlibrary_id }}"><code>https://openlibrary.org/works/{{ openlibrary_id }}</code></a></span>
+</h1>
+
+{% if direction == "in" %}
+ <h3>Cited By</h3>
+ <p>This page lists references to this book from other works (eg, journal articles).
+{% elif direction == "out" %}
+ <h3>References</h3>
+ <i>Refernces from this book to other entities.</i>
+{% endif %}
+
+{{ refs_macros.refs_table(hits, direction) }}
+
+{% endblock %}
+
diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html
new file mode 100644
index 00000000..f2335f52
--- /dev/null
+++ b/python/fatcat_web/templates/reference_match.html
@@ -0,0 +1,93 @@
+{% extends "base.html" %}
+{% import "entity_macros.html" as entity_macros %}
+{% import "edit_macros.html" as edit_macros %}
+
+{% block body %}
+
+<h1>Reference Fuzzy Match Tool</h1>
+
+<form class="ui form" id="reference_match" method="POST" action="/reference/match">
+ <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
+
+ <div class="ui segment">
+ <h3>Parse Citation</h3>
+
+ <p>Enter a citation string here and we will try to parse it (using GROBID)
+ into a structured format, then match against the catalog.
+
+ {{ edit_macros.form_field_basic(form.raw_citation) }}
+
+ <button class="ui primary submit button right floated" type="submit" name="submit_type" value="parse">
+ Parse
+ </button>
+ <br clear="all">
+ </div>
+
+ {% if grobid_status == "success" and grobid_dict %}
+ <div class="ui positive message">
+ <div class="header">Parsed Citation String</div>
+ {{ entity_macros.extra_metadata(grobid_dict) }}
+ <p><i>See below for fuzzy match results</i>
+ </div>
+ {% endif %}
+
+ <div class="ui segment">
+ <h3>Fuzzy Match Metadata</h3>
+
+ <p>Enter whatever bibliographic metadata fields you know, and we will try to
+ match to catalog entries.
+
+ <p><b>NOTE:</b> if you already know a persistent identifier (like a DOI), you
+ should use the <a href="/release/lookup">lookup tool</a> instead.
+
+ <br>
+ <div class="ui equal width fields">
+ {{ edit_macros.form_field_basic(form.title) }}
+ </div>
+ <div class="ui equal width fields">
+ {{ edit_macros.form_field_basic(form.first_author) }}
+ </div>
+ <div class="ui equal width fields">
+ {{ edit_macros.form_field_basic(form.journal) }}
+ </div>
+ <div class="ui equal width fields">
+ {{ edit_macros.form_field_basic(form.year) }}
+ {{ edit_macros.form_field_basic(form.volume) }}
+ {{ edit_macros.form_field_basic(form.issue) }}
+ {{ edit_macros.form_field_basic(form.pages) }}
+ </div>
+
+ <button class="ui primary submit button right floated" type="submit" name="submit_type" value="match">
+ Match
+ </button>
+ <br clear="all">
+ </div>
+
+</form>
+
+{% if matches is defined %}
+ <h3>Matched Releases</h3>
+
+ {% if not matches %}
+ <p><i>No matches found</i>
+ {% endif %}
+
+ <table class="ui very basic celled table">
+ <tbody>
+ {% for match in matches %}
+ <tr><td class="collapsing center aligned">
+ <br><b>{{ match.status.name }}</b>
+ <br>{{ match.reason.name }}
+ <td class="">
+ {{ entity_macros.release_summary(match.release) }}
+ <td class="">
+ {% if match.access_options %}
+ <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a>
+ {% endif %}
+ {% endfor %}
+ </tbody>
+ </table>
+
+{% endif %}
+
+{% endblock %}
diff --git a/python/fatcat_web/templates/refs_macros.html b/python/fatcat_web/templates/refs_macros.html
new file mode 100644
index 00000000..47ea2dcf
--- /dev/null
+++ b/python/fatcat_web/templates/refs_macros.html
@@ -0,0 +1,132 @@
+{% import "entity_macros.html" as entity_macros %}
+
+{% macro pagination_row(hits, with_links=False) %}
+ {% if with_links and hits.offset %}
+ <a href="?offset={{ hits.offset - hits.limit }}">&laquo; prev</a> &nbsp;
+ {% endif %}
+ {% if hits.count_returned == 0 %}
+ Showing 0 references
+ {% else %}
+ Showing {{ "{:,}".format(hits.offset + 1) }} - {{ "{:,}".format(hits.offset + hits.count_returned) }} of {{ "{:,}".format(hits.count_total) }} references
+ {% endif %}
+ {% if with_links and hits.count_total != hits.count_returned and hits.offset + hits.limit < hits.count_total %}
+ &nbsp;<a href="?offset={{ hits.offset + hits.limit }}">next &raquo;</a>
+ {% endif %}
+{% endmacro %}
+
+{% macro refs_table(hits, direction) %}
+<div class="ui warning message">
+ <div class="header">
+ Fuzzy reference matching is a work in progress!
+ </div>
+ Read more about quality, completeness, and caveats <a href="https://guide.fatcat.wiki/reference_graph.html">in the fatcat guide</a>.
+</div>
+
+<table class="ui table">
+<thead>
+ <tr><th colspan="3">
+ {{ pagination_row(hits, with_links=False) }}
+ (in {{ hits.query_wall_time_ms }}ms)
+</thead>
+<tbody>
+{% if hits.count_total == 0 %}
+ <tr><td class="ui placeholder segment">
+ <div class="ui icon header">
+ <i class="unlink icon"></i>
+ No References Found
+ </div>
+{% endif %}
+{% for row in hits.result_refs %}
+ {% set release = row.release %}
+ <tr>
+ <td class="collapsing left aligned top aligned">
+ {# TODO: ref_locator? #}
+ {% if direction == "out" %}
+ {% if row.ref.ref_key %}
+ <code title="index={{ row.ref.ref_index }}">[{{ row.ref.ref_key }}]</code><br>
+ {% endif %}
+ {% endif %}
+
+ {% if row.ref.match_status == "exact" %}
+ {% set match_icon = "linkify" %}
+ {% elif row.ref.match_status == "unmatched" %}
+ {% set match_icon = "question circle outline" %}
+ {% else %}
+ {% set match_icon = "magic" %}
+ {% endif %}
+ <i class="{{ match_icon }} icon" title="{{ row.ref.match_status }} {{ row.ref.match_reason }}"></i><br>
+ {% if row.ref.match_provenance %}
+ via {{ row.ref.match_provenance }}<br>
+ {% endif %}
+
+ <td class="">
+ {% if release %}
+ {{ entity_macros.release_summary(release) }}
+ {% elif direction == "in" and row.ref.source_wikipedia_article %}
+ {% set wiki_lang = row.ref.source_wikipedia_article.split(':')[0] %}
+ {% set wiki_article = ':'.join(row.ref.source_wikipedia_article.split(':')[1:]) %}
+ <b>
+ <a href="https://{{ wiki_lang }}.wikipedia.org/wiki/{{ wiki_article.replace(' ', '_') }}">
+ {{ wiki_article }}
+ </a>
+ [wikipedia]
+ </b>
+ <br>
+ <span style="color:green;">lang:{{ wiki_lang }}</span>&nbsp;
+ <a href="/wikipedia/{{ wiki_lang }}:{{ wiki_article.replace(' ', '_') }}/refs-out" style="color:green;">[references]</a>&nbsp;
+ {% elif direction == "out" and row.ref.target_unstructured %}
+ <code>{{ row.ref.target_unstructured }}</code>
+ {% if row.ref.target_openlibrary_work %}
+ <br>
+ <a href="https://openlibrary.org/{{ row.ref.target_openlibrary_work }}" style="color:green;">openlibrary:{{ row.ref.target_openlibrary_work }}</a>&nbsp;
+ <a href="/openlibrary/{{ row.ref.target_openlibrary_work}}/refs-in" style="color:green;">[cited-by]</a>&nbsp;
+ {% endif %}
+ {% elif direction == "out" and row.ref.target_csl %}
+ {{ entity_macros.csl_summary(row.ref.target_csl) }}
+ {% else %}
+ <i>blank</i>
+ {% endif %}
+ <td class="center aligned">
+ {% if row.access %}
+ {% for access in row.access %}
+ <a href="{{ access.access_url}}" class="ui green label" style="background-color: #2ca048;">
+ {%- if access.access_type.name == "wayback" %}
+ web.archive.org
+ {%- elif access.access_type.name == "ia_file" -%}
+ archive.org
+ {%- else -%}
+ {{ access.access_type.name }}
+ {%- endif -%}
+ {%- if access.mimetype == "application/pdf" %}
+ [PDF]
+ {%- elif access.mimetype == "text/html" %}
+ [HTML]
+ {%- endif -%}
+ </a>
+ <br>
+ {% endfor %}
+ {% elif direction == "out" and row.ref.target_unstructured %}
+ <form class="ui form" id="reference_match" method="POST" action="/reference/match">
+ <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
+ <input type="hidden" name="raw_citation" value="{{ row.ref.target_unstructured }}">
+ <button class="ui tiny primary submit button" type="submit" name="submit_type" value="parse">
+ re-parse
+ </button>
+ </form>
+ {% endif %}
+{% endfor %}
+</tbody>
+<tfoot>
+ <tr><th colspan="3">
+ <div style="float: right;">
+ <a href="{{ request.path }}.json?{{ request.query_string.decode() }}">JSON</a>
+ </div>
+ {% if hits.count_returned != hits.count_total %}
+ <center>
+ {{ pagination_row(hits, with_links=True) }}
+ </center>
+ {% endif %}
+</tfoot>
+</table>
+{% endmacro %}
+
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index abf7ace0..4652f4a2 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -84,9 +84,9 @@
Published
{% if release.container.ident %}
in <a href="/container/{{ release.container.ident }}"><span itemprop="name">{{ release.container.name }}</span></a>
- {% elif release.extra and release.extra.container_name %}
+ {%- elif release.extra and release.extra.container_name %}
in <span itemprop="name">{{ release.extra.container_name }}</span>
- {% endif %}
+ {%- endif %}
{% else %}
Released
{% if release.release_type %}
@@ -95,26 +95,27 @@
{% if release.container %}
in <a href="/container/{{ release.container.ident }}"><span itemprop="name">{{ release.container.name }}</span></a>
{% endif %}
- {% endif %}
+ {% endif -%}
{% if release.publisher %}
by <span itemprop="publisher">{{ release.publisher }}</span>
{%- endif %}.
<p>
- {% if release.volume != None %}
- Volume {{ release.volume }}
- {%- if release.issue != None %}, {% endif %}
- {% endif %}
- {% if release.issue != None %}
- Issue {{ release.issue}}
+ {% set comma = joiner(", ") %}
+ {% if release.release_year != None %}
+ {{ release.release_year }} &nbsp;
{% endif %}
- {% if release.pages != None %}
+ {% if release.volume != None %}
+ {{- comma() }}Volume {{ release.volume -}}
+ {%- endif %}
+ {%- if release.issue != None %}
+ {{- comma() }}Issue {{ release.issue -}}
+ {%- endif %}
+ {%- if release.pages != None %}
+ {{- comma() }}
{% if release.pages[0].isdigit() %}p{% endif -%}
{{ release.pages }}
{% endif %}
- {% if release.release_year != None %}
- ({{ release.release_year }})
- {% endif %}
</div>
{% if release.abstracts != [] %}
diff --git a/python/fatcat_web/templates/release_view_fuzzy_refs.html b/python/fatcat_web/templates/release_view_fuzzy_refs.html
new file mode 100644
index 00000000..8cba4f4e
--- /dev/null
+++ b/python/fatcat_web/templates/release_view_fuzzy_refs.html
@@ -0,0 +1,27 @@
+{% set release = entity %}
+{% set entity_view = "refs-" + direction %}
+{% set entity_type = "release" %}
+{% import "refs_macros.html" as refs_macros %}
+{% extends "entity_base.html" %}
+
+
+{% block entity_main %}
+
+{% if direction == "in" %}
+ <h3>Cited By</h3>
+ <i>References to this release by other works.</i>
+{% elif direction == "out" %}
+ <h3>References</h3>
+ <i>NOTE: currently batch computed and may include additional references sources, or be missing recent changes, compared to entity reference list.</i>
+
+ {% if hits.count_total == 0 and release.refs %}
+ <div class="ui positive message">
+ <p>No <i>fuzzy</i> references found, but there are <a href="/release/{{ release.ident }}/references">{{ release.refs|count }} legacy references</a>
+ </div>
+ {% endif %}
+{% endif %}
+
+{{ refs_macros.refs_table(hits, direction) }}
+
+{% endblock %}
+
diff --git a/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html b/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html
new file mode 100644
index 00000000..3e1453c1
--- /dev/null
+++ b/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html
@@ -0,0 +1,23 @@
+{% extends "base.html" %}
+{% import "refs_macros.html" as refs_macros %}
+
+{% block title %}Wikipedia Refs{% endblock %}
+
+{% block fullbody %}
+<h1 class="ui header">
+ [{{ wiki_lang }}] {{ wiki_article }}
+ <span class="sub header"><a href="{{ wiki_url }}"><code>{{ wiki_url }}</code></a></span>
+</h1>
+
+{% if direction == "in" %}
+ <h3>Cited By</h3>
+ <p>This page lists references to a wikipedia article, from other works (eg, journal articles).
+{% elif direction == "out" %}
+ <h3>References</h3>
+ <i>Refernces from wikipedia article to other entities.</i>
+{% endif %}
+
+{{ refs_macros.refs_table(hits, direction) }}
+
+{% endblock %}
+
diff --git a/python/tests/files/elastic_empty.json b/python/tests/files/elastic_empty.json
new file mode 100644
index 00000000..9b30d03b
--- /dev/null
+++ b/python/tests/files/elastic_empty.json
@@ -0,0 +1 @@
+{"took": 10, "timed_out": false, "_shards": {"total": 6, "successful": 6, "skipped": 0, "failed": 0}, "hits": {"total": {"value": 0, "relation": "eq"}, "max_score": null, "hits": []}} \ No newline at end of file
diff --git a/python/tests/files/elastic_refs_in_release.json b/python/tests/files/elastic_refs_in_release.json
new file mode 100644
index 00000000..5260ae3f
--- /dev/null
+++ b/python/tests/files/elastic_refs_in_release.json
@@ -0,0 +1,360 @@
+{
+ "took": 30,
+ "timed_out": false,
+ "_shards": {
+ "total": 6,
+ "successful": 6,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": {
+ "value": 69,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": [
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "rxy26aoognaytoeghum4ncmygq_30",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 30,
+ "ref_key": "BIB0030|jon779-cit-0030",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "3bggl3ls3fftvl3g6yg5qzy4yq",
+ "source_year": "2013",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "3bggl3ls3fftvl3g6yg5qzy4yq"
+ ]
+ },
+ "sort": [
+ 2013
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "xew5r3gnxbaznhj3kevspu75yq_46",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 46,
+ "ref_key": "_bib46",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "dqfgm7p2urh3dd2ja2s5cleqr4",
+ "source_year": "2013",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "dqfgm7p2urh3dd2ja2s5cleqr4"
+ ]
+ },
+ "sort": [
+ 2013
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "dyy6cr675zbivam4wldogvc7ue_23",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 23,
+ "ref_key": "BFmp2012104_CR23",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "7igrlb5tevgoxdeds2w2opwj7a",
+ "source_year": "2012",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "7igrlb5tevgoxdeds2w2opwj7a"
+ ]
+ },
+ "sort": [
+ 2012
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "kbivvq4n5nhfpfl3dc7xq6bzbu_33",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T01:37:55Z",
+ "match_provenance": "fuzzy",
+ "match_reason": "jaccardauthors",
+ "match_status": "strong",
+ "ref_index": 33,
+ "ref_key": "b33",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "75yfavca2bbwbarcchkm7afhyy",
+ "source_year": "2012",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "75yfavca2bbwbarcchkm7afhyy"
+ ]
+ },
+ "sort": [
+ 2012
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "2kemk2jdynacznanpixvqiytla_48",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 48,
+ "ref_key": "BFmp201237_CR48",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "7fu5n2crurex7isvfv5tnf3y2i",
+ "source_year": "2012",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "7fu5n2crurex7isvfv5tnf3y2i"
+ ]
+ },
+ "sort": [
+ 2012
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "q3qy5z3htnd2likregg3dff23i_37",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 37,
+ "ref_key": "BFnpp2012215_CR37",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "hqk3tnlyvffoppkakajynn5x2u",
+ "source_year": "2012",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "hqk3tnlyvffoppkakajynn5x2u"
+ ]
+ },
+ "sort": [
+ 2012
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "fzgecyr6cnggbinanrdyc44b4a_126",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 126,
+ "ref_key": "BFtp201234_CR126",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "hwtljgl3dvhojeusfwon6iba6q",
+ "source_year": "2012",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "hwtljgl3dvhojeusfwon6iba6q"
+ ]
+ },
+ "sort": [
+ 2012
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "7lt5parryzcbhentd75sqgsuvu_15",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T11:47:48Z",
+ "match_provenance": "grobid",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 15,
+ "ref_key": "b14",
+ "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "source_work_ident": "xenzkonwivbl3bzirbtqohsb4q",
+ "source_year": "0",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "fields": {
+ "source_work_ident": [
+ "xenzkonwivbl3bzirbtqohsb4q"
+ ]
+ },
+ "sort": [
+ 0
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "wikipedia_knxxa2djmeqem4tbnztw65i_cx2xdrrgt5cydcinttixgj4nxu",
+ "_score": null,
+ "_source": {
+ "match_provenance": "wikipedia",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "source_wikipedia_article": "en:Sophia Frangou",
+ "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu",
+ "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna"
+ },
+ "sort": [
+ -9223372036854776000
+ ],
+ "inner_hits": {
+ "source_more": {
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ }
+ }
+ }
+ }
+ ]
+ }
+}
diff --git a/python/tests/files/elastic_refs_out_release.json b/python/tests/files/elastic_refs_out_release.json
new file mode 100644
index 00000000..5a45acee
--- /dev/null
+++ b/python/tests/files/elastic_refs_out_release.json
@@ -0,0 +1,683 @@
+{
+ "took": 15,
+ "timed_out": false,
+ "_shards": {
+ "total": 6,
+ "successful": 6,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": {
+ "value": 34,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": [
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_1",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 1,
+ "ref_key": "e_1_3_2_1_2_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. GSL- GNU Scientific Library. https://www.gnu.org/software/gsl/. Accessed: 2020-01-01. GSL- GNU Scientific Library. https://www.gnu.org/software/gsl/."
+ },
+ "sort": [
+ 1
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_2",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 2,
+ "ref_key": "e_1_3_2_1_3_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. KLEE with floating point support. https://github.com/srg-imperial/klee-float. Accessed: 2020-01-01. KLEE with floating point support. https://github.com/srg-imperial/klee-float."
+ },
+ "sort": [
+ 2
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_3",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 3,
+ "ref_key": "e_1_3_2_1_4_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. LibTooling. https://clang.llvm.org/docs/LibTooling.html. Accessed: 2020-01-01. LibTooling. https://clang.llvm.org/docs/LibTooling.html."
+ },
+ "sort": [
+ 3
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_4",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 4,
+ "ref_key": "e_1_3_2_1_5_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. Meschach Library. https://www.netlib.org/c/meschach/readme. Accessed: 2020-01-01. Meschach Library. https://www.netlib.org/c/meschach/readme."
+ },
+ "sort": [
+ 4
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_5",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 5,
+ "ref_key": "e_1_3_2_1_6_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. Toyota: Software to blame for Prius brake problems. http://www.cnn.com/2010/WORLD/asiapcf/02/04/japan.prius.complaints/index.html. Accessed: 2020-01-01. Toyota: Software to blame for Prius brake problems. http://www.cnn.com/2010/WORLD/asiapcf/02/04/japan.prius.complaints/index.html."
+ },
+ "sort": [
+ 5
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_6",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 6,
+ "ref_key": "e_1_3_2_1_7_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Accessed: 2020-01-01. Z3. https://github.com/Z3Prover/z3. Accessed: 2020-01-01. Z3. https://github.com/Z3Prover/z3."
+ },
+ "sort": [
+ 6
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_7",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 7,
+ "ref_key": "e_1_3_2_1_8_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_csl": {
+ "accessed": {},
+ "author": [
+ {
+ "raw_name": "Alatawi Eman"
+ }
+ ],
+ "container-title": "Symbolic Execution with Invariant Inlay: Evaluating the Potential. In 2018 25th Australasian Software Engineering Conference, ASWEC 2018.",
+ "issued": {
+ "date-parts": [[2019]]
+ }
+ }
+ },
+ "sort": [
+ 7
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_8",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 8,
+ "ref_key": "e_1_3_2_1_9_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_csl": {
+ "accessed": {},
+ "author": [
+ {
+ "name": "Bagnara Roberto"
+ }
+ ],
+ "container-title": "Symbolic Path-Oriented Test Data Generation for Floating-Point Programs. In Sixth IEEE International Conference on Software Testing, Verification and Validation, ICST",
+ "issued": {
+ "raw": "2000"
+ }
+ }
+ },
+ "sort": [
+ 8
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_10",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 10,
+ "ref_key": "e_1_3_2_1_10_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "kf6qgd3e6vc3nhkpf3m32qehj4"
+ },
+ "sort": [
+ 10
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_11",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:48Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 11,
+ "ref_key": "e_1_3_2_1_11_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "gynqpbv73jbdfcfpnzptsq4m64"
+ },
+ "sort": [
+ 11
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_12",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:47Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 12,
+ "ref_key": "e_1_3_2_1_12_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "osmyp7kaxzbd3duf6hohrtuzvm"
+ },
+ "sort": [
+ 12
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_14",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:40Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 14,
+ "ref_key": "e_1_3_2_1_14_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "sbxxyxrtxfe5fdukmcgjgjf6we"
+ },
+ "sort": [
+ 14
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_15",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:46Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 15,
+ "ref_key": "e_1_3_2_1_15_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "aw5o7bbhnjef7fy3cg3prpune4"
+ },
+ "sort": [
+ 15
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_16",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:49Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 16,
+ "ref_key": "e_1_3_2_1_16_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "sdmeosutrzgppilsibx5kbinba"
+ },
+ "sort": [
+ 16
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_18",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:05:08Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 18,
+ "ref_key": "e_1_3_2_1_18_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "ae26aeacuvdi3mlgut3g32f42i"
+ },
+ "sort": [
+ 18
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_19",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:52Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 19,
+ "ref_key": "e_1_3_2_1_19_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "bqsv52bbdnggxkls5cgdbubovm"
+ },
+ "sort": [
+ 19
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_21",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:54Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 21,
+ "ref_key": "e_1_3_2_1_21_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "edpnjczcr5ebrppg5g5adrg5ty"
+ },
+ "sort": [
+ 21
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_22",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:15:52Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 22,
+ "ref_key": "e_1_3_2_1_22_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "cropr6fte5dbtbnnlaau7fcp3a"
+ },
+ "sort": [
+ 22
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_23",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:58Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 23,
+ "ref_key": "e_1_3_2_1_23_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "e7kzr7fvmrg2repjxglg6ptzz4"
+ },
+ "sort": [
+ 23
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_24",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-14T03:08:24Z",
+ "match_reason": "unknown",
+ "match_status": "unmatched",
+ "ref_index": 24,
+ "ref_key": "e_1_3_2_1_25_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_unstructured": "Michael O. Lam Jeffrey K. Hollingsworth and G. W. Stewart. 2013. Dynamic floating-point cancellation detection. Parallel Comput. (2013). Michael O. Lam Jeffrey K. Hollingsworth and G. W. Stewart. 2013. Dynamic floating-point cancellation detection. Parallel Comput. (2013)."
+ },
+ "sort": [
+ 24
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_26",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:05:10Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 26,
+ "ref_key": "e_1_3_2_1_26_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "zwdzqlf4ivdlbg6f7hmccyig7u"
+ },
+ "sort": [
+ 26
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_27",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:50Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 27,
+ "ref_key": "e_1_3_2_1_27_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "parh4accgzgbtahyxmm2sdfnzy"
+ },
+ "sort": [
+ 27
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_28",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:05:08Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 28,
+ "ref_key": "e_1_3_2_1_28_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "br57toqbtnathfwbmkqyyva63i"
+ },
+ "sort": [
+ 28
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_29",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:49Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 29,
+ "ref_key": "e_1_3_2_1_29_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "tlc4c3mwjnh25nnfjgpoic5r44"
+ },
+ "sort": [
+ 29
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_30",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:52Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 30,
+ "ref_key": "e_1_3_2_1_30_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "z7rtkf2gljct5pvmlt6vitakri"
+ },
+ "sort": [
+ 30
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_31",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:49Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 31,
+ "ref_key": "e_1_3_2_1_31_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "qhxmybpi2fgt3hqnxx4admuwwq"
+ },
+ "sort": [
+ 31
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_32",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:49Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 32,
+ "ref_key": "e_1_3_2_1_32_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "xdbsyi37jvfidpfp36okylalzi"
+ },
+ "sort": [
+ 32
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_34",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:43Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 34,
+ "ref_key": "e_1_3_2_1_34_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "pc5ynoopsnclvbiyzedr3swk34"
+ },
+ "sort": [
+ 34
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_35",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:56Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 35,
+ "ref_key": "e_1_3_2_1_35_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "hnmreinbn5aehmz2hqqydfpeve"
+ },
+ "sort": [
+ 35
+ ]
+ },
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "yfr23sa5rbfizoqjku7uwhq6ye_36",
+ "_score": null,
+ "_source": {
+ "indexed_ts": "2021-07-10T13:16:52Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 36,
+ "ref_key": "e_1_3_2_1_36_1",
+ "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye",
+ "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa",
+ "source_year": "2020",
+ "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "target_work_ident": "abumc5tgazaahbjh4hb7t6k7qm"
+ },
+ "sort": [
+ 36
+ ]
+ }
+ ]
+ }
+}
diff --git a/python/tests/web_refs.py b/python/tests/web_refs.py
new file mode 100644
index 00000000..bceb8557
--- /dev/null
+++ b/python/tests/web_refs.py
@@ -0,0 +1,54 @@
+
+import json
+import pytest
+
+from fatcat_web.search import get_elastic_container_random_releases
+from fatcat_openapi_client.rest import ApiException
+from fixtures import *
+
+
+def test_basic_refs(app, mocker):
+
+ with open('tests/files/elastic_refs_in_release.json') as f:
+ elastic_resp_in = json.loads(f.read())
+ with open('tests/files/elastic_refs_out_release.json') as f:
+ elastic_resp_out = json.loads(f.read())
+ with open('tests/files/elastic_empty.json') as f:
+ elastic_resp_empty = json.loads(f.read())
+
+ es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw.side_effect = [
+ (200, {}, json.dumps(elastic_resp_in)),
+ (200, {}, json.dumps(elastic_resp_in)),
+ (200, {}, json.dumps(elastic_resp_empty)),
+ (200, {}, json.dumps(elastic_resp_out)),
+ (200, {}, json.dumps(elastic_resp_out)),
+ (200, {}, json.dumps(elastic_resp_empty)),
+ ]
+
+ # render refs-in
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in')
+ assert rv.status_code == 200
+ assert b"Why Most Published Research Findings Are False" in rv.data
+
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json')
+ assert rv.status_code == 200
+
+ # empty (in)
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in')
+ assert rv.status_code == 200
+ assert b"No References Found" in rv.data
+
+ # render refs-out
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out')
+ assert rv.status_code == 200
+ assert b"Why Most Published Research Findings Are False" in rv.data
+
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json')
+ assert rv.status_code == 200
+
+ # empty (out)
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out')
+ assert rv.status_code == 200
+ assert b"No References Found" in rv.data
+