diff options
Diffstat (limited to 'python')
19 files changed, 2228 insertions, 15 deletions
diff --git a/python/.pylintrc b/python/.pylintrc index d3003620..5fc310a8 100644 --- a/python/.pylintrc +++ b/python/.pylintrc @@ -14,3 +14,5 @@ notes=FIXME,XXX,DELETEME ignored-modules=responses # FileEntityForm' has no 'data' member (no-member) (etc) generated-members=data,errors +# No name 'BaseModel' in module 'pydantic' (no-name-in-module) (etc) +extension-pkg-allow-list=pydantic diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py new file mode 100644 index 00000000..496a46e1 --- /dev/null +++ b/python/fatcat_tools/references.py @@ -0,0 +1,429 @@ +""" +Helper routines for working with the fatcat citation graph, which is a separate +index of reference links between works in the main catalog. + +See bulk citation and citation API proposals for design documentation. +""" + +import sys +import datetime +import argparse +from typing import Optional, List, Any, Dict, Union + +from pydantic import BaseModel, validator +import elasticsearch +from elasticsearch_dsl import Search +from fatcat_openapi_client import ReleaseEntity + +from fatcat_tools import public_api +from fatcat_tools.transforms.access import release_access_options, AccessOption +from fatcat_tools.transforms.entities import entity_to_dict + + +class BiblioRef(BaseModel): + """bibliographic reference""" + # ("release", source_release_ident, ref_index) + # ("wikipedia", source_wikipedia_article, ref_index) + _key: Optional[str] + update_ts: Optional[datetime.datetime] + + # metadata about source of reference + source_release_ident: Optional[str] + source_work_ident: Optional[str] + # with lang prefix like "en:Superglue" + source_wikipedia_article: Optional[str] + source_release_stage: Optional[str] + source_year: Optional[int] + + # context of the reference itself + # 1-indexed, not 0-indexed + ref_index: Optional[int] # TODO: actually optional? + # eg, "Lee86", "BIB23" + ref_key: Optional[str] + # eg, page number + ref_locator: Optional[str] + + # target of reference (identifiers) + target_release_ident: Optional[str] + target_work_ident: Optional[str] + target_openlibrary_work: Optional[str] + # TODO: target_url_surt: Optional[str] + # would not be stored in elasticsearch, but would be auto-generated by all "get" methods from the SURT, so calling code does not need to do SURT transform + target_url: Optional[str] + + # crossref, pubmed, grobid, etc + match_provenance: Optional[str] + # strong, weak, etc + match_status: Optional[str] + # TODO: "match_strength"? + # "doi", "isbn", "fuzzy title, author", etc + # maybe "fuzzy-title-author"? + match_reason: Optional[str] + + # only if no release_ident link/match + target_unstructured: Optional[str] + target_csl: Optional[Dict[str, Any]] + + def hacks(self): + """ + Temporary (?) hacks to work around schema/data issues + """ + if self.target_openlibrary_work and self.target_openlibrary_work.startswith("/works/"): + self.target_openlibrary_work = self.target_openlibrary_work[7:] + + # work-arounds for bad/weird ref_key + if self.ref_key: + self.ref_key = self.ref_key.strip() + if self.ref_key[0] in ['/', '_']: + self.ref_key = self.ref_key[1:] + if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key: + self.ref_key = self.ref_key.split('-')[-1] + if self.ref_key.startswith("10.") and '_' in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if len(self.ref_key) > 10 and "#" in self.ref_key: + self.ref_key = self.ref_key.split('#')[-1] + if len(self.ref_key) > 10 and "_" in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if not self.ref_key and self.ref_index is not None: + self.ref_key = str(self.ref_index) + return self + + +class EnrichedBiblioRef(BaseModel): + # enriched version of BiblioRef with complete ReleaseEntity object as + # fetched from the fatcat API. CSL-JSON metadata would be derived from + # the full release entity. + ref: BiblioRef + release: Optional[ReleaseEntity] + # TODO: openlibrary work? + access: List[AccessOption] + + @validator('release') + @classmethod + def check_release(cls, v): + if v is not None and not isinstance(v, ReleaseEntity): + raise ValueError("expected a ReleaseEntity") + return v + + class Config: + arbitrary_types_allowed = True + json_encoders = { + ReleaseEntity: entity_to_dict, + } + + +class RefHits(BaseModel): + count_returned: int + count_total: int + offset: int + limit: int + query_time_ms: int + query_wall_time_ms: int + result_refs: List[Union[BiblioRef,EnrichedBiblioRef]] + + class Config: + json_encoders = { + ReleaseEntity: entity_to_dict, + } + + +def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) -> RefHits: + """ + Internal helper for querying elasticsearch refs index and transforming hits + """ + + limit = min((int(limit or 15), 200)) + if not offset or offset < 0: + offset = 0 + + search = search.params(track_total_hits=True) + search = search[offset : (offset + limit)] + + query_start = datetime.datetime.now() + try: + resp = search.execute() + except elasticsearch.exceptions.RequestError as e_raw: + # this is a "user" error + e: Any = e_raw + #logging.warn("elasticsearch 400: " + str(e.info)) + if e.info.get("error", {}).get("root_cause", {}): + raise ValueError(str(e.info["error"]["root_cause"][0].get("reason"))) from e + else: + raise ValueError(str(e.info)) from e + except elasticsearch.exceptions.TransportError as e: + # all other errors + #logging.warn(f"elasticsearch non-200 status code: {e.info}") + raise IOError(str(e.info)) from e + query_delta = datetime.datetime.now() - query_start + + result_refs = [] + for h in resp.hits: + # might be a list because of consolidation + if isinstance(h._d_.get('source_work_ident'), list): + h._d_['source_work_ident'] = h._d_['source_work_ident'][0] + result_refs.append(BiblioRef.parse_obj(h._d_).hacks()) + + return RefHits( + count_returned=len(result_refs), + # ES 7.x style "total" + count_total=resp.hits.total.value, + offset=offset, + limit=limit, + query_time_ms=int(resp.took), + query_wall_time_ms=int(query_delta.total_seconds() * 1000), + result_refs=result_refs, + ) + + +def get_outbound_refs( + es_client: Any, + release_ident: Optional[str] = None, + work_ident: Optional[str] = None, + wikipedia_article: Optional[str] = None, + limit: int = 100, + offset: Optional[int] = None, + es_index: str = "fatcat_ref", +) -> RefHits: + + search = Search(using=es_client, index=es_index) + + if release_ident: + search = search.filter("term", source_release_ident=release_ident) + elif work_ident: + search = search.filter("term", source_work_ident=work_ident) + elif wikipedia_article: + search = search.filter("term", source_wikipedia_article=wikipedia_article) + else: + raise ValueError("require a lookup key") + + search = search.sort("ref_index") + + # re-sort by index + hits = _execute_ref_query(search, limit=limit, offset=offset) + hits.result_refs = sorted(hits.result_refs, key=lambda r: r.ref_index or 0) + return hits + + +def get_inbound_refs( + es_client: Any, + release_ident: Optional[str] = None, + work_ident: Optional[str] = None, + openlibrary_work: Optional[str] = None, + url: Optional[str] = None, + consolidate_works: bool = True, + filter_stage: List[str] = [], + sort: Optional[str] = None, + limit: int = 25, + offset: Optional[int] = None, + es_index: str = "fatcat_ref", +) -> List[BiblioRef]: + + search = Search(using=es_client, index=es_index) + + if consolidate_works: + search = search.extra( + collapse={ + "field": "source_work_ident", + "inner_hits": {"name": "source_more", "size": 0,}, + } + ) + + if release_ident: + search = search.filter("term", target_release_ident=release_ident) + elif work_ident: + search = search.filter("term", target_work_ident=work_ident) + elif openlibrary_work: + search = search.filter("term", target_openlibrary_work=openlibrary_work) + else: + raise ValueError("require a lookup key") + + if filter_stage: + search = search.filter("term", source_stage=filter_stage) + + if sort == "newest": + search = search.sort("-source_year") + elif sort == "oldest": + search = search.sort("source_year") + else: + search = search.sort("-source_year") + + return _execute_ref_query(search, limit=limit, offset=offset) + + +def count_inbound_refs( + es_client: Any, + release_ident: Optional[str] = None, + work_ident: Optional[str] = None, + openlibrary_work: Optional[str] = None, + url: Optional[str] = None, + filter_stage: List[str] = [], + es_index: str = "fatcat_ref", +) -> int: + """ + Same parameters as get_inbound_refs(), but returns just a count + """ + + search = Search(using=es_client, index=es_index) + + if release_ident: + search = search.filter("term", target_release_ident=release_ident) + elif work_ident: + search = search.filter("term", target_work_ident=work_ident) + elif openlibrary_work: + search = search.filter("term", target_openlibrary_work=openlibrary_work) + else: + raise ValueError("require a lookup key") + + if filter_stage: + search = search.filter("term", source_stage=filter_stage) + + return search.count() + + +# run fatcat API fetches for each ref and return "enriched" refs +def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]: + enriched = [] + for ref in refs: + release = None + access = [] + if ref.source_release_ident: + release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand) + access = release_access_options(release) + if ref.source_wikipedia_article: + wiki_lang = ref.source_wikipedia_article.split(':')[0] + wiki_article = ':'.join(ref.source_wikipedia_article.split(':')[1:]).replace(' ', '_') + access.append(AccessOption( + access_type="wikipedia", + access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}", + mimetype=None, + size_bytes=None, + thumbnail_url=None + )) + enriched.append(EnrichedBiblioRef( + ref=ref, + access=access, + release=release, + )) + return enriched + + +def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]: + enriched = [] + for ref in refs: + release = None + access = [] + if ref.target_release_ident: + release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand) + access = release_access_options(release) + if ref.target_openlibrary_work: + access.append(AccessOption( + access_type="openlibrary", + access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}", + mimetype=None, + size_bytes=None, + thumbnail_url=None + )) + if ref.target_url and '://web.archive.org/' in ref.target_url: + access.append(AccessOption( + access_type="wayback", + access_url=ref.target_url, + mimetype=None, + size_bytes=None, + thumbnail_url=None + )) + enriched.append(EnrichedBiblioRef( + ref=ref, + access=access, + release=release, + )) + return enriched + + +def run_ref_query(args) -> None: + """ + CLI helper/debug tool (prints to stdout) + """ + release_ident = None + work_ident = None + if args.ident.startswith("release_"): + release_ident = args.ident.split('_')[1] + elif args.ident.startswith("work_"): + work_ident = args.ident.split('_')[1] + else: + release_ident = args.ident + + print("## Outbound References") + hits = get_outbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client) + print(f"Total: {hits.count_total} Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms") + + if args.enrich == "fatcat": + enriched = enrich_outbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client) + for ref in enriched: + if ref.release: + print(f"{ref.ref.ref_index or '-'}\trelease_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}") + else: + print(f"{ref.ref.ref_index or '-'}\trelease_{ref.target_release_ident}") + else: + for ref in hits.result_refs: + print(f"{ref.ref.ref_index or '-'}\trelease_{ref.target_release_ident}") + + print() + print("## Inbound References") + hits = get_inbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client) + + print(f"Total: {hits.count_total} Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms") + + if args.enrich == "fatcat": + enriched = enrich_inbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client) + for ref in enriched: + if ref.release: + print(f"release_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}") + else: + print(f"release_{ref.target_release_ident}") + else: + for ref in hits.result_refs: + print(f"work_{ref.source_work_ident}\trelease_{ref.source_release_ident}") + +def main() -> None: + """ + Run this utility like: + + python -m fatcat_tools.references + + Examples: + + python -m fatcat_tools.references query release_pfrind3kh5hqhgqkueulk2tply + """ + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + subparsers = parser.add_subparsers() + + parser.add_argument("--fatcat-api-base", default="https://api.fatcat.wiki/v0") + parser.add_argument("--elasticsearch-base", default="https://search.fatcat.wiki") + parser.add_argument("--elasticsearch-ref-index", default="fatcat_ref") + + sub = subparsers.add_parser( + "query", + help="takes a fatcat ident argument, prints both inbound and outbound references", + ) + sub.set_defaults(func="run_ref_query") + sub.add_argument("ident", type=str) + sub.add_argument("--enrich", type=str) + + args = parser.parse_args() + if not args.__dict__.get("func"): + parser.print_help(file=sys.stderr) + sys.exit(-1) + + args.es_client = elasticsearch.Elasticsearch(args.elasticsearch_base) + args.fatcat_api_client = public_api(args.fatcat_api_base) + + if args.func == "run_ref_query": + run_ref_query(args) + else: + raise NotImplementedError(args.func) + +if __name__ == "__main__": + main() diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py new file mode 100644 index 00000000..5ed64c7c --- /dev/null +++ b/python/fatcat_tools/transforms/access.py @@ -0,0 +1,60 @@ + +from enum import Enum +from typing import Optional, List + +from pydantic import BaseModel +from fatcat_openapi_client import ReleaseEntity + + +class AccessType(str, Enum): + """describes type of access URL""" + + wayback = "wayback" + ia_file = "ia_file" + ia_microfilm = "ia_microfilm" + repository = "repository" + openlibrary = "openlibrary" + wikipedia = "wikipedia" + +class AccessOption(BaseModel): + + access_type: AccessType + + # note: for `target_url` refs, would do a CDX lookup and this URL would be + # a valid/HTTP-200 web.archive.org capture URL + access_url: str + + # application/pdf, text/html, etc + # blank for landing pages + mimetype: Optional[str] + + size_bytes: Optional[int] + thumbnail_url: Optional[str] + + +def release_access_options(release: ReleaseEntity) -> List[AccessOption]: + """ + Extracts access options from a release. + + TODO: proper implementation + """ + options = [] + for f in (release.files or []): + for u in (f.urls or []): + if '://web.archive.org/' in u.url: + return [AccessOption( + access_type="wayback", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + elif '://archive.org/' in u.url: + return [AccessOption( + access_type="ia_file", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + return options diff --git a/python/fatcat_web/__init__.py b/python/fatcat_web/__init__.py index 07b4e083..3207bc75 100644 --- a/python/fatcat_web/__init__.py +++ b/python/fatcat_web/__init__.py @@ -76,7 +76,7 @@ app.register_blueprint(mwoauth.bp, url_prefix='/auth/wikipedia') app.es_client = elasticsearch.Elasticsearch(Config.ELASTICSEARCH_BACKEND) -from fatcat_web import routes, editing_routes, auth, cors, forms +from fatcat_web import routes, editing_routes, ref_routes, auth, cors, forms # TODO: blocking on ORCID support in loginpass if Config.ORCID_CLIENT_ID: diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 1c9fb199..19176a59 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -482,3 +482,44 @@ class EntityTomlForm(EntityEditForm): etf.toml.data = entity_to_toml(entity, pop_fields=pop_fields) return etf + +class ReferenceMatchForm(FlaskForm): + + submit_type = SelectField('submit_type', + [validators.DataRequired()], + choices=['parse', 'match']) + + raw_citation = TextAreaField("Citation String", render_kw={'rows':'3'}) + + title = StringField("Title") + journal = StringField("Journal or Conference") + first_author = StringField("First Author") + #year = IntegerField('Year Released', + # [validators.Optional(True), valid_year]) + year = StringField("Year Released") + volume = StringField("Volume") + issue = StringField("Issue") + pages = StringField("Pages") + + @staticmethod + def from_grobid_parse(parse_dict, raw_citation): + """ + Initializes form from GROBID extraction + """ + rmf = ReferenceMatchForm() + rmf.raw_citation.data = raw_citation + + direct_fields = ['title', 'journal', 'volume', 'issue', 'pages'] + for k in direct_fields: + if parse_dict.get(k): + a = getattr(rmf, k) + a.data = parse_dict[k] + + date = parse_dict.get('date') + if date and len(date) >= 4 and date[0:4].isdigit(): + rmf.year.data = int(date[0:4]) + + if parse_dict.get('authors'): + rmf.first_author.data = parse_dict['authors'][0].get('name') + + return rmf diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py new file mode 100644 index 00000000..d4219012 --- /dev/null +++ b/python/fatcat_web/ref_routes.py @@ -0,0 +1,168 @@ +""" +Flask endpoints for reference (citation) endpoints. Eg, listing references +"inbound" and "outbound" from a specific release or work. +""" + +from flask import render_template, request, Response +from fatcat_openapi_client import * +from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release +from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches + +from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits +from fatcat_tools.transforms.access import release_access_options +from fatcat_web import app, api +from fatcat_web.cors import crossdomain +from fatcat_web.forms import * +from fatcat_web.entity_helpers import * + +def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None, wikipedia_article=None) -> RefHits: + offset = request.args.get('offset', '0') + offset = max(0, int(offset)) if offset.isnumeric() else 0 + limit = request.args.get('limit', '30') + limit = min(max(0, int(limit)), 100) if limit.isnumeric() else 30 + if direction == "in": + hits = get_inbound_refs( + release_ident=release_ident, + work_ident=work_ident, + openlibrary_work=openlibrary_id, + es_client=app.es_client, + offset=offset, + limit=limit, + ) + hits.result_refs = enrich_inbound_refs( + hits.result_refs, + fatcat_api_client=api, + expand="container,files,webcaptures", + ) + elif direction == "out": + hits = get_outbound_refs( + release_ident=release_ident, + wikipedia_article=wikipedia_article, + work_ident=work_ident, + es_client=app.es_client, + offset=offset, + limit=limit, + ) + hits.result_refs = enrich_outbound_refs( + hits.result_refs, + fatcat_api_client=api, + expand="container,files,webcaptures", + ) + else: + raise ValueError() + return hits + + +@app.route('/release/<string(length=26):ident>/refs-in', methods=['GET']) +def release_view_refs_inbound(ident): + if request.accept_mimetypes.best == "application/json": + return release_view_refs_inbound_json(ident) + + release = generic_get_entity("release", ident) + hits = _refs_web("in", release_ident=ident) + return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits), 200 + + +@app.route('/release/<string(length=26):ident>/refs-out', methods=['GET']) +def release_view_refs_outbound(ident): + if request.accept_mimetypes.best == "application/json": + return release_view_refs_outbound_json(ident) + + release = generic_get_entity("release", ident) + hits = _refs_web("out", release_ident=ident) + return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits), 200 + +@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET']) +def openlibrary_view_refs_inbound(id_num): + if request.accept_mimetypes.best == "application/json": + return openlibrary_view_refs_inbound_json(id_num) + + openlibrary_id = f"OL{id_num}W" + hits = _refs_web("in", openlibrary_id=openlibrary_id) + return render_template('openlibrary_view_fuzzy_refs.html', openlibrary_id=openlibrary_id, direction="in", hits=hits), 200 + +@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out', methods=['GET']) +def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str): + if request.accept_mimetypes.best == "application/json": + return wikipedia_view_refs_outbound_json(wiki_lang, wiki_article) + + wiki_url = f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}" + wiki_article = wiki_article.replace('_', ' ') + wikipedia_article = wiki_lang + ":" + wiki_article + hits = _refs_web("out", wikipedia_article=wikipedia_article) + return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200 + + +@app.route('/reference/match', methods=['GET', 'POST']) +def reference_match(): + + form = ReferenceMatchForm() + grobid_status = None + grobid_dict = None + + if form.is_submitted(): + if form.validate_on_submit(): + if form.submit_type.data == 'parse': + resp_xml = grobid_api_process_citation(form.raw_citation.data) + if not resp_xml: + grobid_status = "failed" + return render_template('reference_match.html', form=form, grobid_status=grobid_status), 400 + grobid_dict = transform_grobid_ref_xml(resp_xml) + if not grobid_dict: + grobid_status = "empty" + return render_template('reference_match.html', form=form, grobid_status=grobid_status), 200 + #print(grobid_dict) + release_stub = grobid_ref_to_release(grobid_dict) + # remove empty values from GROBID parsed dict + grobid_dict = {k: v for k, v in grobid_dict.items() if v is not None} + form = ReferenceMatchForm.from_grobid_parse(grobid_dict, form.raw_citation.data) + grobid_status = "success" + matches = close_fuzzy_release_matches(es_client=app.es_client, release=release_stub, match_limit=10) or [] + elif form.submit_type.data == 'match': + matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or [] + else: + raise NotImplementedError() + + for m in matches: + # expand releases more completely + m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs") + # hack in access options + m.access_options = release_access_options(m.release) + + return render_template('reference_match.html', form=form, grobid_dict=grobid_dict, grobid_status=grobid_status, matches=matches), 200 + + elif form.errors: + return render_template('reference_match.html', form=form), 400 + + return render_template('reference_match.html', form=form), 200 + + +### Pseudo-APIs ############################################################# + +@app.route('/release/<string(length=26):ident>/refs-out.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def release_view_refs_outbound_json(ident): + hits = _refs_web("out", release_ident=ident) + return Response(hits.json(exclude_unset=True), mimetype="application/json") + + +@app.route('/release/<string(length=26):ident>/refs-in.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def release_view_refs_inbound_json(ident): + hits = _refs_web("in", release_ident=ident) + return Response(hits.json(exclude_unset=True), mimetype="application/json") + +@app.route('/openlibrary/OL<int:id_num>W/refs-in.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def openlibrary_view_refs_inbound_json(id_num): + openlibrary_id = f"OL{id_num}W" + hits = _refs_web("in", openlibrary_id=openlibrary_id) + return Response(hits.json(exclude_unset=True), mimetype="application/json") + +@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str): + wiki_article = wiki_article.replace('_', ' ') + wikipedia_article = wiki_lang + ":" + wiki_article + hits = _refs_web("out", wikipedia_article=wikipedia_article) + return Response(hits.json(exclude_unset=True), mimetype="application/json") diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 144922a8..ab060c45 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1128,12 +1128,14 @@ def page_edit_conflict(e): @app.errorhandler(500) def page_server_error(e): + app.log.error(e) return render_template('500.html'), 500 @app.errorhandler(502) @app.errorhandler(503) @app.errorhandler(504) def page_server_down(e): + app.log.error(e) return render_template('503.html'), 503 @app.errorhandler(ApiException) diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html index 36280f5d..52acd70a 100644 --- a/python/fatcat_web/templates/entity_base.html +++ b/python/fatcat_web/templates/entity_base.html @@ -85,7 +85,10 @@ {{ entity_tab("coverage", "Coverage", "/coverage") }} {% elif entity_type == "release" and entity.state != 'deleted' %} {{ entity_tab("contribs", "Authors", "/contribs", entity._authors|count ) }} - {{ entity_tab("references", "References", "/references", entity.refs|count) }} + {% if entity.state == 'active' %} + {{ entity_tab("refs-out", "References", "/refs-out") }} + {{ entity_tab("refs-in", "Cited By", "/refs-in") }} + {% endif %} {% endif %} {{ entity_tab("metadata", "Metadata", "/metadata") }} </div> diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index 50f45753..6b565f69 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -387,3 +387,112 @@ yellow </table> {%- endmacro %} + +{# this is useful for things like showing lists of releases in tables #} +{% macro release_summary(release) %} + <b><a href="/release/{{ release.ident }}">{{ release.title }}</a></b> + {% if release.release_type not in ["article-journal", "paper-conference"] %} + <b>[{{ release.release_type or "unknown-type" }}]</b> + {% endif %} + {% if release.contribs %}<br>{% endif %} + {% for contrib in release.contribs[:8] %} + {% if contrib.creator %} + <a href="/contib/{{ contrib.creator.ident }}" style="color: black;">{{ contrib.creator.display_name }}</a> + {% else %} + {{ contrib.raw_name }} + {%- endif %} + {%- if not loop.last %}, {% endif %} + {% endfor %} + {% if release.contribs | length > 8 %} <i>(+ more)</i> {%endif %} + {% if release.release_year or release.container or (release.extra and release.extra.container_name) %}<br>{% endif %} + {% if release.release_year %} + {% if release.release_date %} + <span title="{{ release.release_date }}">{{ release.release_year }}</span> + {% else %} + {{ release.release_year }} + {% endif %} + + {% endif %} + {% if release.container %} + <a href="/container/{{ release.container.ident }}" style="color: black;"><i>{{ release.container.name }}</i></a> + {% elif release.extra and release.extra.container_name %} + <i>{{ release.extra.container_name }}</i> + {% endif %} + + {% if release.release_stage == "submitted" %} + <b style="color: brown; text-transform: uppercase;">pre-print</b> + {% elif release.release_stage and release.release_stage != "published" %} + <b style="color: brown; text-transform: uppercase;">{{ release.release_stage }} version</b> + {% elif not release.release_stage %} + <b style="color: brown; text-transform: uppercase;">unpublished</b> + {% endif %} +<br> + {% if release.version %} + <span style="color:green">version:{{ release.version }}</span> + {% endif %} + {% if release.number %} + <span style="color:green">number:{{ release.number }}</span> + {% endif %} + {% if release.ext_ids.doi %} + <a href="https://doi.org/{{ release.ext_ids.doi }}" style="color:green;">doi:{{ release.ext_ids.doi }}</a> + {% endif %} + {# TODO: links #} + {% if release.ext_ids.arxiv %} + <a href="#" style="color:green;">arXiv:{{ release.ext_ids.arxiv }}</a> + {% endif %} + {% if release.ext_ids.pmcid %} + <a href="#" style="color:green;">pmcid:{{ release.ext_ids.pmcid }}</a> + {% endif %} + {% if release.ext_ids.pmid %} + <a href="#" style="color:green;">pmid:{{ release.ext_ids.pmid }}</a> + {% endif %} + {% if release.ext_ids.dblp %} + <a href="#" style="color:green;">dblp:{{ release.ext_ids.dblp }}</a> + {% endif %} +{% endmacro %} + +{# similar to the release_summary above, but for CSL-JSON #} +{% macro csl_summary(csl) %} + <b>{{ csl.title }}</b> + {% if csl.title and csl.author %}<br>{% endif %} + {% if csl.author %} + {% for author in csl.author[:8] %} + {% if author.literal %} + {{ author.literal }} + {% elif author.raw_name %} + {{ author.raw_name }} + {% elif author.family and author.given %} + {{ author.given }} {{ author.family }} + {% elif author.family %} + {{ author.family }} + {% elif author.name %} + {# DEPRECATED: was used by refs code path for a while. Delete in, eg, year 2022 #} + {{ author.name }} + {% endif %} + {%- if not loop.last %}, {% endif %} + {% endfor %} + {% if csl.author | length > 8 %} <i>(+ more)</i> {%endif %} + {% endif %} + + {% if csl.issued or csl["container-title"] %}<br>{% endif %} + {% if csl.issued and csl.issued is mapping %} + {% if csl.issued['date-parts'] %} + {{ csl.issued['date-parts'][0][0] }} + {% elif csl.issued.raw %} + {{ csl.issued.raw }} + {% endif %} + {% endif %} + {% if csl["container-title"] %} + <i>{{ csl["container-title"] }}</i> + {% endif %} + <br> + {% if csl.volume %} + <span style="color:green">volume:{{ csl.volume}}</span> + {% endif %} + {% if csl.DOI %} + <a href="https://doi.org/{{ csl.DOI }}" style="color:green;">doi:{{ csl.DOI }}</a> + {% endif %} + {% if csl.URL %} + <a href="{{ csl.URL }}" style="color:green;">url:{{ csl.URL }}</a> + {% endif %} +{% endmacro %} diff --git a/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html new file mode 100644 index 00000000..21bf76f2 --- /dev/null +++ b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html @@ -0,0 +1,25 @@ +{% extends "base.html" %} +{% import "refs_macros.html" as refs_macros %} + +{% block title %}Open Library Refs{% endblock %} + +{% block fullbody %} +<h1 class="ui header"> + {% if hits.result_refs and hits.result_refs[0].ref.target_unstructured %} + <i>{{ hits.result_refs[0].ref.target_unstructured }}</i> + {% endif %} + <span class="sub header"><a href="https://openlibrary.org/works/{{ openlibrary_id }}"><code>https://openlibrary.org/works/{{ openlibrary_id }}</code></a></span> +</h1> + +{% if direction == "in" %} + <h3>Cited By</h3> + <p>This page lists references to this book from other works (eg, journal articles). +{% elif direction == "out" %} + <h3>References</h3> + <i>Refernces from this book to other entities.</i> +{% endif %} + +{{ refs_macros.refs_table(hits, direction) }} + +{% endblock %} + diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html new file mode 100644 index 00000000..f2335f52 --- /dev/null +++ b/python/fatcat_web/templates/reference_match.html @@ -0,0 +1,93 @@ +{% extends "base.html" %} +{% import "entity_macros.html" as entity_macros %} +{% import "edit_macros.html" as edit_macros %} + +{% block body %} + +<h1>Reference Fuzzy Match Tool</h1> + +<form class="ui form" id="reference_match" method="POST" action="/reference/match"> + <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> + + <div class="ui segment"> + <h3>Parse Citation</h3> + + <p>Enter a citation string here and we will try to parse it (using GROBID) + into a structured format, then match against the catalog. + + {{ edit_macros.form_field_basic(form.raw_citation) }} + + <button class="ui primary submit button right floated" type="submit" name="submit_type" value="parse"> + Parse + </button> + <br clear="all"> + </div> + + {% if grobid_status == "success" and grobid_dict %} + <div class="ui positive message"> + <div class="header">Parsed Citation String</div> + {{ entity_macros.extra_metadata(grobid_dict) }} + <p><i>See below for fuzzy match results</i> + </div> + {% endif %} + + <div class="ui segment"> + <h3>Fuzzy Match Metadata</h3> + + <p>Enter whatever bibliographic metadata fields you know, and we will try to + match to catalog entries. + + <p><b>NOTE:</b> if you already know a persistent identifier (like a DOI), you + should use the <a href="/release/lookup">lookup tool</a> instead. + + <br> + <div class="ui equal width fields"> + {{ edit_macros.form_field_basic(form.title) }} + </div> + <div class="ui equal width fields"> + {{ edit_macros.form_field_basic(form.first_author) }} + </div> + <div class="ui equal width fields"> + {{ edit_macros.form_field_basic(form.journal) }} + </div> + <div class="ui equal width fields"> + {{ edit_macros.form_field_basic(form.year) }} + {{ edit_macros.form_field_basic(form.volume) }} + {{ edit_macros.form_field_basic(form.issue) }} + {{ edit_macros.form_field_basic(form.pages) }} + </div> + + <button class="ui primary submit button right floated" type="submit" name="submit_type" value="match"> + Match + </button> + <br clear="all"> + </div> + +</form> + +{% if matches is defined %} + <h3>Matched Releases</h3> + + {% if not matches %} + <p><i>No matches found</i> + {% endif %} + + <table class="ui very basic celled table"> + <tbody> + {% for match in matches %} + <tr><td class="collapsing center aligned"> + <br><b>{{ match.status.name }}</b> + <br>{{ match.reason.name }} + <td class=""> + {{ entity_macros.release_summary(match.release) }} + <td class=""> + {% if match.access_options %} + <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a> + {% endif %} + {% endfor %} + </tbody> + </table> + +{% endif %} + +{% endblock %} diff --git a/python/fatcat_web/templates/refs_macros.html b/python/fatcat_web/templates/refs_macros.html new file mode 100644 index 00000000..47ea2dcf --- /dev/null +++ b/python/fatcat_web/templates/refs_macros.html @@ -0,0 +1,132 @@ +{% import "entity_macros.html" as entity_macros %} + +{% macro pagination_row(hits, with_links=False) %} + {% if with_links and hits.offset %} + <a href="?offset={{ hits.offset - hits.limit }}">« prev</a> + {% endif %} + {% if hits.count_returned == 0 %} + Showing 0 references + {% else %} + Showing {{ "{:,}".format(hits.offset + 1) }} - {{ "{:,}".format(hits.offset + hits.count_returned) }} of {{ "{:,}".format(hits.count_total) }} references + {% endif %} + {% if with_links and hits.count_total != hits.count_returned and hits.offset + hits.limit < hits.count_total %} + <a href="?offset={{ hits.offset + hits.limit }}">next »</a> + {% endif %} +{% endmacro %} + +{% macro refs_table(hits, direction) %} +<div class="ui warning message"> + <div class="header"> + Fuzzy reference matching is a work in progress! + </div> + Read more about quality, completeness, and caveats <a href="https://guide.fatcat.wiki/reference_graph.html">in the fatcat guide</a>. +</div> + +<table class="ui table"> +<thead> + <tr><th colspan="3"> + {{ pagination_row(hits, with_links=False) }} + (in {{ hits.query_wall_time_ms }}ms) +</thead> +<tbody> +{% if hits.count_total == 0 %} + <tr><td class="ui placeholder segment"> + <div class="ui icon header"> + <i class="unlink icon"></i> + No References Found + </div> +{% endif %} +{% for row in hits.result_refs %} + {% set release = row.release %} + <tr> + <td class="collapsing left aligned top aligned"> + {# TODO: ref_locator? #} + {% if direction == "out" %} + {% if row.ref.ref_key %} + <code title="index={{ row.ref.ref_index }}">[{{ row.ref.ref_key }}]</code><br> + {% endif %} + {% endif %} + + {% if row.ref.match_status == "exact" %} + {% set match_icon = "linkify" %} + {% elif row.ref.match_status == "unmatched" %} + {% set match_icon = "question circle outline" %} + {% else %} + {% set match_icon = "magic" %} + {% endif %} + <i class="{{ match_icon }} icon" title="{{ row.ref.match_status }} {{ row.ref.match_reason }}"></i><br> + {% if row.ref.match_provenance %} + via {{ row.ref.match_provenance }}<br> + {% endif %} + + <td class=""> + {% if release %} + {{ entity_macros.release_summary(release) }} + {% elif direction == "in" and row.ref.source_wikipedia_article %} + {% set wiki_lang = row.ref.source_wikipedia_article.split(':')[0] %} + {% set wiki_article = ':'.join(row.ref.source_wikipedia_article.split(':')[1:]) %} + <b> + <a href="https://{{ wiki_lang }}.wikipedia.org/wiki/{{ wiki_article.replace(' ', '_') }}"> + {{ wiki_article }} + </a> + [wikipedia] + </b> + <br> + <span style="color:green;">lang:{{ wiki_lang }}</span> + <a href="/wikipedia/{{ wiki_lang }}:{{ wiki_article.replace(' ', '_') }}/refs-out" style="color:green;">[references]</a> + {% elif direction == "out" and row.ref.target_unstructured %} + <code>{{ row.ref.target_unstructured }}</code> + {% if row.ref.target_openlibrary_work %} + <br> + <a href="https://openlibrary.org/{{ row.ref.target_openlibrary_work }}" style="color:green;">openlibrary:{{ row.ref.target_openlibrary_work }}</a> + <a href="/openlibrary/{{ row.ref.target_openlibrary_work}}/refs-in" style="color:green;">[cited-by]</a> + {% endif %} + {% elif direction == "out" and row.ref.target_csl %} + {{ entity_macros.csl_summary(row.ref.target_csl) }} + {% else %} + <i>blank</i> + {% endif %} + <td class="center aligned"> + {% if row.access %} + {% for access in row.access %} + <a href="{{ access.access_url}}" class="ui green label" style="background-color: #2ca048;"> + {%- if access.access_type.name == "wayback" %} + web.archive.org + {%- elif access.access_type.name == "ia_file" -%} + archive.org + {%- else -%} + {{ access.access_type.name }} + {%- endif -%} + {%- if access.mimetype == "application/pdf" %} + [PDF] + {%- elif access.mimetype == "text/html" %} + [HTML] + {%- endif -%} + </a> + <br> + {% endfor %} + {% elif direction == "out" and row.ref.target_unstructured %} + <form class="ui form" id="reference_match" method="POST" action="/reference/match"> + <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> + <input type="hidden" name="raw_citation" value="{{ row.ref.target_unstructured }}"> + <button class="ui tiny primary submit button" type="submit" name="submit_type" value="parse"> + re-parse + </button> + </form> + {% endif %} +{% endfor %} +</tbody> +<tfoot> + <tr><th colspan="3"> + <div style="float: right;"> + <a href="{{ request.path }}.json?{{ request.query_string.decode() }}">JSON</a> + </div> + {% if hits.count_returned != hits.count_total %} + <center> + {{ pagination_row(hits, with_links=True) }} + </center> + {% endif %} +</tfoot> +</table> +{% endmacro %} + diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index abf7ace0..4652f4a2 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -84,9 +84,9 @@ Published {% if release.container.ident %} in <a href="/container/{{ release.container.ident }}"><span itemprop="name">{{ release.container.name }}</span></a> - {% elif release.extra and release.extra.container_name %} + {%- elif release.extra and release.extra.container_name %} in <span itemprop="name">{{ release.extra.container_name }}</span> - {% endif %} + {%- endif %} {% else %} Released {% if release.release_type %} @@ -95,26 +95,27 @@ {% if release.container %} in <a href="/container/{{ release.container.ident }}"><span itemprop="name">{{ release.container.name }}</span></a> {% endif %} - {% endif %} + {% endif -%} {% if release.publisher %} by <span itemprop="publisher">{{ release.publisher }}</span> {%- endif %}. <p> - {% if release.volume != None %} - Volume {{ release.volume }} - {%- if release.issue != None %}, {% endif %} - {% endif %} - {% if release.issue != None %} - Issue {{ release.issue}} + {% set comma = joiner(", ") %} + {% if release.release_year != None %} + {{ release.release_year }} {% endif %} - {% if release.pages != None %} + {% if release.volume != None %} + {{- comma() }}Volume {{ release.volume -}} + {%- endif %} + {%- if release.issue != None %} + {{- comma() }}Issue {{ release.issue -}} + {%- endif %} + {%- if release.pages != None %} + {{- comma() }} {% if release.pages[0].isdigit() %}p{% endif -%} {{ release.pages }} {% endif %} - {% if release.release_year != None %} - ({{ release.release_year }}) - {% endif %} </div> {% if release.abstracts != [] %} diff --git a/python/fatcat_web/templates/release_view_fuzzy_refs.html b/python/fatcat_web/templates/release_view_fuzzy_refs.html new file mode 100644 index 00000000..8cba4f4e --- /dev/null +++ b/python/fatcat_web/templates/release_view_fuzzy_refs.html @@ -0,0 +1,27 @@ +{% set release = entity %} +{% set entity_view = "refs-" + direction %} +{% set entity_type = "release" %} +{% import "refs_macros.html" as refs_macros %} +{% extends "entity_base.html" %} + + +{% block entity_main %} + +{% if direction == "in" %} + <h3>Cited By</h3> + <i>References to this release by other works.</i> +{% elif direction == "out" %} + <h3>References</h3> + <i>NOTE: currently batch computed and may include additional references sources, or be missing recent changes, compared to entity reference list.</i> + + {% if hits.count_total == 0 and release.refs %} + <div class="ui positive message"> + <p>No <i>fuzzy</i> references found, but there are <a href="/release/{{ release.ident }}/references">{{ release.refs|count }} legacy references</a> + </div> + {% endif %} +{% endif %} + +{{ refs_macros.refs_table(hits, direction) }} + +{% endblock %} + diff --git a/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html b/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html new file mode 100644 index 00000000..3e1453c1 --- /dev/null +++ b/python/fatcat_web/templates/wikipedia_view_fuzzy_refs.html @@ -0,0 +1,23 @@ +{% extends "base.html" %} +{% import "refs_macros.html" as refs_macros %} + +{% block title %}Wikipedia Refs{% endblock %} + +{% block fullbody %} +<h1 class="ui header"> + [{{ wiki_lang }}] {{ wiki_article }} + <span class="sub header"><a href="{{ wiki_url }}"><code>{{ wiki_url }}</code></a></span> +</h1> + +{% if direction == "in" %} + <h3>Cited By</h3> + <p>This page lists references to a wikipedia article, from other works (eg, journal articles). +{% elif direction == "out" %} + <h3>References</h3> + <i>Refernces from wikipedia article to other entities.</i> +{% endif %} + +{{ refs_macros.refs_table(hits, direction) }} + +{% endblock %} + diff --git a/python/tests/files/elastic_empty.json b/python/tests/files/elastic_empty.json new file mode 100644 index 00000000..9b30d03b --- /dev/null +++ b/python/tests/files/elastic_empty.json @@ -0,0 +1 @@ +{"took": 10, "timed_out": false, "_shards": {"total": 6, "successful": 6, "skipped": 0, "failed": 0}, "hits": {"total": {"value": 0, "relation": "eq"}, "max_score": null, "hits": []}}
\ No newline at end of file diff --git a/python/tests/files/elastic_refs_in_release.json b/python/tests/files/elastic_refs_in_release.json new file mode 100644 index 00000000..5260ae3f --- /dev/null +++ b/python/tests/files/elastic_refs_in_release.json @@ -0,0 +1,360 @@ +{ + "took": 30, + "timed_out": false, + "_shards": { + "total": 6, + "successful": 6, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 69, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "rxy26aoognaytoeghum4ncmygq_30", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 30, + "ref_key": "BIB0030|jon779-cit-0030", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "3bggl3ls3fftvl3g6yg5qzy4yq", + "source_year": "2013", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "3bggl3ls3fftvl3g6yg5qzy4yq" + ] + }, + "sort": [ + 2013 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "xew5r3gnxbaznhj3kevspu75yq_46", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 46, + "ref_key": "_bib46", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "dqfgm7p2urh3dd2ja2s5cleqr4", + "source_year": "2013", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "dqfgm7p2urh3dd2ja2s5cleqr4" + ] + }, + "sort": [ + 2013 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "dyy6cr675zbivam4wldogvc7ue_23", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 23, + "ref_key": "BFmp2012104_CR23", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "7igrlb5tevgoxdeds2w2opwj7a", + "source_year": "2012", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "7igrlb5tevgoxdeds2w2opwj7a" + ] + }, + "sort": [ + 2012 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "kbivvq4n5nhfpfl3dc7xq6bzbu_33", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T01:37:55Z", + "match_provenance": "fuzzy", + "match_reason": "jaccardauthors", + "match_status": "strong", + "ref_index": 33, + "ref_key": "b33", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "75yfavca2bbwbarcchkm7afhyy", + "source_year": "2012", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "75yfavca2bbwbarcchkm7afhyy" + ] + }, + "sort": [ + 2012 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "2kemk2jdynacznanpixvqiytla_48", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 48, + "ref_key": "BFmp201237_CR48", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "7fu5n2crurex7isvfv5tnf3y2i", + "source_year": "2012", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "7fu5n2crurex7isvfv5tnf3y2i" + ] + }, + "sort": [ + 2012 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "q3qy5z3htnd2likregg3dff23i_37", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 37, + "ref_key": "BFnpp2012215_CR37", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "hqk3tnlyvffoppkakajynn5x2u", + "source_year": "2012", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "hqk3tnlyvffoppkakajynn5x2u" + ] + }, + "sort": [ + 2012 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "fzgecyr6cnggbinanrdyc44b4a_126", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 126, + "ref_key": "BFtp201234_CR126", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "hwtljgl3dvhojeusfwon6iba6q", + "source_year": "2012", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "hwtljgl3dvhojeusfwon6iba6q" + ] + }, + "sort": [ + 2012 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "7lt5parryzcbhentd75sqgsuvu_15", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T11:47:48Z", + "match_provenance": "grobid", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 15, + "ref_key": "b14", + "source_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "source_work_ident": "xenzkonwivbl3bzirbtqohsb4q", + "source_year": "0", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "fields": { + "source_work_ident": [ + "xenzkonwivbl3bzirbtqohsb4q" + ] + }, + "sort": [ + 0 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "wikipedia_knxxa2djmeqem4tbnztw65i_cx2xdrrgt5cydcinttixgj4nxu", + "_score": null, + "_source": { + "match_provenance": "wikipedia", + "match_reason": "doi", + "match_status": "exact", + "source_wikipedia_article": "en:Sophia Frangou", + "target_release_ident": "cx2xdrrgt5cydcinttixgj4nxu", + "target_work_ident": "s45xqgdp5bftrbymxtsl32hcna" + }, + "sort": [ + -9223372036854776000 + ], + "inner_hits": { + "source_more": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + } + ] + } +} diff --git a/python/tests/files/elastic_refs_out_release.json b/python/tests/files/elastic_refs_out_release.json new file mode 100644 index 00000000..5a45acee --- /dev/null +++ b/python/tests/files/elastic_refs_out_release.json @@ -0,0 +1,683 @@ +{ + "took": 15, + "timed_out": false, + "_shards": { + "total": 6, + "successful": 6, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 34, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_1", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 1, + "ref_key": "e_1_3_2_1_2_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. GSL- GNU Scientific Library. https://www.gnu.org/software/gsl/. Accessed: 2020-01-01. GSL- GNU Scientific Library. https://www.gnu.org/software/gsl/." + }, + "sort": [ + 1 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_2", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 2, + "ref_key": "e_1_3_2_1_3_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. KLEE with floating point support. https://github.com/srg-imperial/klee-float. Accessed: 2020-01-01. KLEE with floating point support. https://github.com/srg-imperial/klee-float." + }, + "sort": [ + 2 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_3", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 3, + "ref_key": "e_1_3_2_1_4_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. LibTooling. https://clang.llvm.org/docs/LibTooling.html. Accessed: 2020-01-01. LibTooling. https://clang.llvm.org/docs/LibTooling.html." + }, + "sort": [ + 3 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_4", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 4, + "ref_key": "e_1_3_2_1_5_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. Meschach Library. https://www.netlib.org/c/meschach/readme. Accessed: 2020-01-01. Meschach Library. https://www.netlib.org/c/meschach/readme." + }, + "sort": [ + 4 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_5", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 5, + "ref_key": "e_1_3_2_1_6_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. Toyota: Software to blame for Prius brake problems. http://www.cnn.com/2010/WORLD/asiapcf/02/04/japan.prius.complaints/index.html. Accessed: 2020-01-01. Toyota: Software to blame for Prius brake problems. http://www.cnn.com/2010/WORLD/asiapcf/02/04/japan.prius.complaints/index.html." + }, + "sort": [ + 5 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_6", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 6, + "ref_key": "e_1_3_2_1_7_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Accessed: 2020-01-01. Z3. https://github.com/Z3Prover/z3. Accessed: 2020-01-01. Z3. https://github.com/Z3Prover/z3." + }, + "sort": [ + 6 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_7", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 7, + "ref_key": "e_1_3_2_1_8_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_csl": { + "accessed": {}, + "author": [ + { + "raw_name": "Alatawi Eman" + } + ], + "container-title": "Symbolic Execution with Invariant Inlay: Evaluating the Potential. In 2018 25th Australasian Software Engineering Conference, ASWEC 2018.", + "issued": { + "date-parts": [[2019]] + } + } + }, + "sort": [ + 7 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_8", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 8, + "ref_key": "e_1_3_2_1_9_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_csl": { + "accessed": {}, + "author": [ + { + "name": "Bagnara Roberto" + } + ], + "container-title": "Symbolic Path-Oriented Test Data Generation for Floating-Point Programs. In Sixth IEEE International Conference on Software Testing, Verification and Validation, ICST", + "issued": { + "raw": "2000" + } + } + }, + "sort": [ + 8 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_10", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 10, + "ref_key": "e_1_3_2_1_10_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "kf6qgd3e6vc3nhkpf3m32qehj4" + }, + "sort": [ + 10 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_11", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:48Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 11, + "ref_key": "e_1_3_2_1_11_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "gynqpbv73jbdfcfpnzptsq4m64" + }, + "sort": [ + 11 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_12", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:47Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 12, + "ref_key": "e_1_3_2_1_12_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "osmyp7kaxzbd3duf6hohrtuzvm" + }, + "sort": [ + 12 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_14", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:40Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 14, + "ref_key": "e_1_3_2_1_14_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "sbxxyxrtxfe5fdukmcgjgjf6we" + }, + "sort": [ + 14 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_15", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:46Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 15, + "ref_key": "e_1_3_2_1_15_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "aw5o7bbhnjef7fy3cg3prpune4" + }, + "sort": [ + 15 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_16", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:49Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 16, + "ref_key": "e_1_3_2_1_16_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "sdmeosutrzgppilsibx5kbinba" + }, + "sort": [ + 16 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_18", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:05:08Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 18, + "ref_key": "e_1_3_2_1_18_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "ae26aeacuvdi3mlgut3g32f42i" + }, + "sort": [ + 18 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_19", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:52Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 19, + "ref_key": "e_1_3_2_1_19_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "bqsv52bbdnggxkls5cgdbubovm" + }, + "sort": [ + 19 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_21", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:54Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 21, + "ref_key": "e_1_3_2_1_21_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "edpnjczcr5ebrppg5g5adrg5ty" + }, + "sort": [ + 21 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_22", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:15:52Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 22, + "ref_key": "e_1_3_2_1_22_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "cropr6fte5dbtbnnlaau7fcp3a" + }, + "sort": [ + 22 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_23", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:58Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 23, + "ref_key": "e_1_3_2_1_23_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "e7kzr7fvmrg2repjxglg6ptzz4" + }, + "sort": [ + 23 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_24", + "_score": null, + "_source": { + "indexed_ts": "2021-07-14T03:08:24Z", + "match_reason": "unknown", + "match_status": "unmatched", + "ref_index": 24, + "ref_key": "e_1_3_2_1_25_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_unstructured": "Michael O. Lam Jeffrey K. Hollingsworth and G. W. Stewart. 2013. Dynamic floating-point cancellation detection. Parallel Comput. (2013). Michael O. Lam Jeffrey K. Hollingsworth and G. W. Stewart. 2013. Dynamic floating-point cancellation detection. Parallel Comput. (2013)." + }, + "sort": [ + 24 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_26", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:05:10Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 26, + "ref_key": "e_1_3_2_1_26_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "zwdzqlf4ivdlbg6f7hmccyig7u" + }, + "sort": [ + 26 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_27", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:50Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 27, + "ref_key": "e_1_3_2_1_27_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "parh4accgzgbtahyxmm2sdfnzy" + }, + "sort": [ + 27 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_28", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:05:08Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 28, + "ref_key": "e_1_3_2_1_28_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "br57toqbtnathfwbmkqyyva63i" + }, + "sort": [ + 28 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_29", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:49Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 29, + "ref_key": "e_1_3_2_1_29_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "tlc4c3mwjnh25nnfjgpoic5r44" + }, + "sort": [ + 29 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_30", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:52Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 30, + "ref_key": "e_1_3_2_1_30_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "z7rtkf2gljct5pvmlt6vitakri" + }, + "sort": [ + 30 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_31", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:49Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 31, + "ref_key": "e_1_3_2_1_31_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "qhxmybpi2fgt3hqnxx4admuwwq" + }, + "sort": [ + 31 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_32", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:49Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 32, + "ref_key": "e_1_3_2_1_32_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "xdbsyi37jvfidpfp36okylalzi" + }, + "sort": [ + 32 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_34", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:43Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 34, + "ref_key": "e_1_3_2_1_34_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "pc5ynoopsnclvbiyzedr3swk34" + }, + "sort": [ + 34 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_35", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:56Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 35, + "ref_key": "e_1_3_2_1_35_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "hnmreinbn5aehmz2hqqydfpeve" + }, + "sort": [ + 35 + ] + }, + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "yfr23sa5rbfizoqjku7uwhq6ye_36", + "_score": null, + "_source": { + "indexed_ts": "2021-07-10T13:16:52Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 36, + "ref_key": "e_1_3_2_1_36_1", + "source_release_ident": "yfr23sa5rbfizoqjku7uwhq6ye", + "source_work_ident": "mkmq6ju4abaexcezpb44cpx5xa", + "source_year": "2020", + "target_release_ident": "aaaaaaaaaaaaarceaaaaaaaaam", + "target_work_ident": "abumc5tgazaahbjh4hb7t6k7qm" + }, + "sort": [ + 36 + ] + } + ] + } +} diff --git a/python/tests/web_refs.py b/python/tests/web_refs.py new file mode 100644 index 00000000..bceb8557 --- /dev/null +++ b/python/tests/web_refs.py @@ -0,0 +1,54 @@ + +import json +import pytest + +from fatcat_web.search import get_elastic_container_random_releases +from fatcat_openapi_client.rest import ApiException +from fixtures import * + + +def test_basic_refs(app, mocker): + + with open('tests/files/elastic_refs_in_release.json') as f: + elastic_resp_in = json.loads(f.read()) + with open('tests/files/elastic_refs_out_release.json') as f: + elastic_resp_out = json.loads(f.read()) + with open('tests/files/elastic_empty.json') as f: + elastic_resp_empty = json.loads(f.read()) + + es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw.side_effect = [ + (200, {}, json.dumps(elastic_resp_in)), + (200, {}, json.dumps(elastic_resp_in)), + (200, {}, json.dumps(elastic_resp_empty)), + (200, {}, json.dumps(elastic_resp_out)), + (200, {}, json.dumps(elastic_resp_out)), + (200, {}, json.dumps(elastic_resp_empty)), + ] + + # render refs-in + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in') + assert rv.status_code == 200 + assert b"Why Most Published Research Findings Are False" in rv.data + + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json') + assert rv.status_code == 200 + + # empty (in) + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in') + assert rv.status_code == 200 + assert b"No References Found" in rv.data + + # render refs-out + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out') + assert rv.status_code == 200 + assert b"Why Most Published Research Findings Are False" in rv.data + + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json') + assert rv.status_code == 200 + + # empty (out) + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out') + assert rv.status_code == 200 + assert b"No References Found" in rv.data + |