From dd6149140542585f2b0bfc3b334ec2b0a88b790e Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 17 Nov 2021 14:51:50 +0100 Subject: complete FuzzyReleaseMatcher refactoring We keep the name, since the api - "matcher.match(release)" - is the same; simplified queries; at most one query is performed against elasticsearch; parallel release retrieval from the API; optional support for release year windows; Test cases are expressed in yaml and will be auto-loaded from the specified directory; test work against the current search endpoint, which means the actual output may change on index updates; for the moment, we think this setup is relatively simple and not too unstable. about: title contrib, partial name input: > { "contribs": [ { "raw_name": "Adams" } ], "title": "digital libraries", "ext_ids": {} } release_year_padding: 1 expected: - 7rmvqtrb2jdyhcxxodihzzcugy - a2u6ougtsjcbvczou6sazsulcm - dy45vilej5diros6zmax46nm4e - exuwhhayird4fdjmmsiqpponlq - gqrj7jikezgcfpjfazhpf4e7c4 - mkmqt3453relbpuyktnmsg6hjq - t2g5sl3dgzchtnq7dynxyzje44 - t4tvenhrvzamraxrvvxivxmvga - wd3oeoi3bffknfbg2ymleqc4ja - y63a6dhrfnb7bltlxfynydbojy --- tests/files/simple_fuzzy_release_matcher/9.yaml | 139 ++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/files/simple_fuzzy_release_matcher/9.yaml (limited to 'tests/files/simple_fuzzy_release_matcher/9.yaml') diff --git a/tests/files/simple_fuzzy_release_matcher/9.yaml b/tests/files/simple_fuzzy_release_matcher/9.yaml new file mode 100644 index 0000000..b43e53a --- /dev/null +++ b/tests/files/simple_fuzzy_release_matcher/9.yaml @@ -0,0 +1,139 @@ +about: a full document, https://fatcat.wiki/release/yvqtz2zvkzcbpj4jxrp7bvydfu +input: > + { + "abstracts": [], + "refs": [], + "contribs": [ + { + "index": 0, + "raw_name": "Annelies Schulte Nordholt", + "role": "author", + "extra": { + "seq": "first" + } + } + ], + "publisher": "Uopen Journals", + "pages": "66", + "ext_ids": { + "doi": "10.18352/bmgn-lchr.128" + }, + "release_year": 2008, + "release_date": "2008-02-19", + "release_stage": "published", + "release_type": "article-journal", + "container_id": "sm7svbj64vc55gj4p23t7c3lrm", + "webcaptures": [], + "filesets": [], + "files": [ + { + "release_ids": [ + "yvqtz2zvkzcbpj4jxrp7bvydfu" + ], + "mimetype": "application/pdf", + "urls": [ + { + "url": "https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/", + "rel": "publisher" + }, + { + "url": "https://web.archive.org/web/20200209043715/https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/", + "rel": "webarchive" + } + ], + "sha256": "96f3552fa3eee10282109dd994f6993caf44627946317d03862a5df167140b23", + "sha1": "a9ba7c2038e2a77ac1b1144344443a3835d83c40", + "md5": "7dae3ec6c1d65cae6a91554071cc9625", + "size": 889420, + "revision": "57e3b801-0d84-405b-be8b-6b2b0583cd75", + "ident": "oew6z4a6gvfqxc5kiy2r62ucfq", + "state": "active" + } + ], + "container": { + "wikidata_qid": "Q15763709", + "issnp": "1873-5045", + "issne": "1873-5045", + "issnl": "1873-5045", + "publisher": "Uopen Journals", + "name": "Relief: Revue Électronique de Littérature Francaise", + "extra": { + "country": "nl", + "default_license": "CC-BY", + "doaj": { + "as_of": "2021-11-20", + "default_license": "CC-BY", + "seal": false + }, + "kbart": { + "clockss": { + "year_spans": [ + [ + 2007, + 2016 + ] + ] + }, + "lockss": { + "year_spans": [ + [ + 2007, + 2019 + ] + ] + }, + "pkp_pln": { + "year_spans": [ + [ + 2007, + 2021 + ] + ] + }, + "portico": { + "year_spans": [ + [ + 2007, + 2017 + ] + ] + } + }, + "languages": [ + "en" + ], + "publisher_type": "unipress", + "road": { + "as_of": "2018-01-24" + }, + "sherpa_romeo": { + "color": "blue" + }, + "szczepanski": { + "as_of": "2018" + }, + "urls": [ + "https://www.revue-relief.org/", + "http://www.revue-relief.org/index.php/relief", + "http://www.revue-relief.org/index.php/relief/about" + ] + }, + "revision": "2f36f957-7b60-4452-9310-1bd5e0035c0e", + "ident": "sm7svbj64vc55gj4p23t7c3lrm", + "state": "active" + }, + "work_id": "qcpd2i2txfdi5emqb7fxsawk6e", + "title": "Georges Perec: topographies parisiennes du flâneur", + "state": "active", + "ident": "yvqtz2zvkzcbpj4jxrp7bvydfu", + "revision": "c9e80d74-8c4f-47a7-b49a-689f26856dff", + "extra": { + "crossref": { + "type": "journal-article" + } + } + } +release_year_padding: 1 +expected: + - yvqtz2zvkzcbpj4jxrp7bvydfu + - lttg27o7mjganpkhrgy3xyv7vu -- cgit v1.2.3