aboutsummaryrefslogtreecommitdiffstats
path: root/tests/files/fuzzy_release_matcher
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-12-06 19:59:51 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-12-06 19:59:51 +0100
commit5bd8ee08a3e0f52893c1b7afa6bc4f062b7c062c (patch)
tree70baa8e9c247e689061f064760fc7f25643c1aa1 /tests/files/fuzzy_release_matcher
parentdd6149140542585f2b0bfc3b334ec2b0a88b790e (diff)
downloadfuzzycat-5bd8ee08a3e0f52893c1b7afa6bc4f062b7c062c.tar.gz
fuzzycat-5bd8ee08a3e0f52893c1b7afa6bc4f062b7c062c.zip
matching: cleanup test files
Diffstat (limited to 'tests/files/fuzzy_release_matcher')
-rw-r--r--tests/files/fuzzy_release_matcher/0.yaml16
-rw-r--r--tests/files/fuzzy_release_matcher/1.yaml24
-rw-r--r--tests/files/fuzzy_release_matcher/2.yaml24
-rw-r--r--tests/files/fuzzy_release_matcher/3.yaml19
-rw-r--r--tests/files/fuzzy_release_matcher/4.yaml16
-rw-r--r--tests/files/fuzzy_release_matcher/5.yaml16
-rw-r--r--tests/files/fuzzy_release_matcher/6.yaml24
-rw-r--r--tests/files/fuzzy_release_matcher/7.yaml10
-rw-r--r--tests/files/fuzzy_release_matcher/8.yaml139
-rw-r--r--tests/files/fuzzy_release_matcher/9.yaml139
10 files changed, 427 insertions, 0 deletions
diff --git a/tests/files/fuzzy_release_matcher/0.yaml b/tests/files/fuzzy_release_matcher/0.yaml
new file mode 100644
index 0000000..71fc992
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/0.yaml
@@ -0,0 +1,16 @@
+about: title and contrib
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Michael Adams"
+ }
+ ],
+ "title": "digital libraries",
+ "ext_ids": {}
+ }
+release_year_padding: 1
+expected:
+ - 7rmvqtrb2jdyhcxxodihzzcugy
+ - upm5nljirrbsfenoyxsisciltq
+ - wd3oeoi3bffknfbg2ymleqc4ja
diff --git a/tests/files/fuzzy_release_matcher/1.yaml b/tests/files/fuzzy_release_matcher/1.yaml
new file mode 100644
index 0000000..df6a954
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/1.yaml
@@ -0,0 +1,24 @@
+about: title contrib, partial name
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Adams"
+ }
+ ],
+ "title": "digital libraries",
+ "ext_ids": {}
+ }
+release_year_padding: 1
+expected:
+ - 7rmvqtrb2jdyhcxxodihzzcugy
+ - a2u6ougtsjcbvczou6sazsulcm
+ - dy45vilej5diros6zmax46nm4e
+ - exuwhhayird4fdjmmsiqpponlq
+ - gqrj7jikezgcfpjfazhpf4e7c4
+ - mkmqt3453relbpuyktnmsg6hjq
+ - t2g5sl3dgzchtnq7dynxyzje44
+ - t4tvenhrvzamraxrvvxivxmvga
+ - wd3oeoi3bffknfbg2ymleqc4ja
+ - y63a6dhrfnb7bltlxfynydbojy
+
diff --git a/tests/files/fuzzy_release_matcher/2.yaml b/tests/files/fuzzy_release_matcher/2.yaml
new file mode 100644
index 0000000..df6a954
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/2.yaml
@@ -0,0 +1,24 @@
+about: title contrib, partial name
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Adams"
+ }
+ ],
+ "title": "digital libraries",
+ "ext_ids": {}
+ }
+release_year_padding: 1
+expected:
+ - 7rmvqtrb2jdyhcxxodihzzcugy
+ - a2u6ougtsjcbvczou6sazsulcm
+ - dy45vilej5diros6zmax46nm4e
+ - exuwhhayird4fdjmmsiqpponlq
+ - gqrj7jikezgcfpjfazhpf4e7c4
+ - mkmqt3453relbpuyktnmsg6hjq
+ - t2g5sl3dgzchtnq7dynxyzje44
+ - t4tvenhrvzamraxrvvxivxmvga
+ - wd3oeoi3bffknfbg2ymleqc4ja
+ - y63a6dhrfnb7bltlxfynydbojy
+
diff --git a/tests/files/fuzzy_release_matcher/3.yaml b/tests/files/fuzzy_release_matcher/3.yaml
new file mode 100644
index 0000000..1ab761b
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/3.yaml
@@ -0,0 +1,19 @@
+about: title only
+input: >
+ {
+ "title": "The future of scholarly communications",
+ "ext_ids": {}
+ }
+release_year_padding: 0
+expected:
+ - '2f57funqizf4lcxjanls45upom'
+ - '3p2hngx6kfa33bdaobipimdzhe'
+ - '75dzcdywlbb3logmrrpkabanfa'
+ - 'ccoocm7uzjgwnlpfk5fbwfudjm'
+ - 'nfydgfziuvhete6p3lrn4u325u'
+ - 'ntpiporu75bendibjku4kjmd5q'
+ - 'op6a5fclonhrxm3zlo6ub2tlw4'
+ - 'opoxzl3zzbccdh5tptm5p2krem'
+ - 'umzryrtocbakberuubjm2pgxum'
+ - 'zb4bjnwqsveyzcwebvvmnsoq7u'
+
diff --git a/tests/files/fuzzy_release_matcher/4.yaml b/tests/files/fuzzy_release_matcher/4.yaml
new file mode 100644
index 0000000..9419406
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/4.yaml
@@ -0,0 +1,16 @@
+about: title, year
+input: >
+ {
+ "title": "The future of scholarly communications",
+ "release_year": 2014,
+ "ext_ids": {}
+ }
+release_year_padding: 0
+expected:
+ - '66r4s55dpvht5jghwkhupai2km'
+ - 'ccoocm7uzjgwnlpfk5fbwfudjm'
+ - 'du4awowpsbbcjlo2pe6dvmxewu'
+ - 'nfydgfziuvhete6p3lrn4u325u'
+ - 'ntpiporu75bendibjku4kjmd5q'
+ - 'op6a5fclonhrxm3zlo6ub2tlw4'
+ - 'xsuxmk5dyba6rnkeslipxxdlzi'
diff --git a/tests/files/fuzzy_release_matcher/5.yaml b/tests/files/fuzzy_release_matcher/5.yaml
new file mode 100644
index 0000000..1eb435b
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/5.yaml
@@ -0,0 +1,16 @@
+about: contrib, year
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Lissandrini"
+ }
+ ],
+ "release_year": 2014,
+ "ext_ids": {}
+ }
+release_year_padding: 1
+expected:
+ - 'xfhjsixnlvbibigrilisqqvfk4'
+ - 'zfhfpo2shrdexpgd2as4fz7wnm'
+ - 'cyct2bqs5feqbowg6ovv53pdfq'
diff --git a/tests/files/fuzzy_release_matcher/6.yaml b/tests/files/fuzzy_release_matcher/6.yaml
new file mode 100644
index 0000000..ae52b23
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/6.yaml
@@ -0,0 +1,24 @@
+about: contrib, year
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Goodwin"
+ }
+ ],
+ "release_year": 2014,
+ "ext_ids": {}
+ }
+release_year_padding: 0
+expected:
+ - 2bbtr4cltbgannqc6vqijvvzdq
+ - 34i2hba6tzf3xomobhumfkkvga
+ - 62sz5fhhuvenpfctf6wejl5m2i
+ - chnqmdm4yfd4zk6kawujvsbhwy
+ - chs7be23vfdthk3xre54w534zm
+ - f5lp3nipazhyxoa2xarlomkofm
+ - hikujb5wmvasnoat2myt56l63y
+ - qbom7rwqtzfypa5hltgbx4e2iq
+ - qh44drz3bvg2ndzwzc55xops7y
+ - r4n57quetbf7tddwodjauegmzq
+
diff --git a/tests/files/fuzzy_release_matcher/7.yaml b/tests/files/fuzzy_release_matcher/7.yaml
new file mode 100644
index 0000000..2330f0d
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/7.yaml
@@ -0,0 +1,10 @@
+about: just a subtitle
+input: >
+ {
+ "subtitle": "topographies parisiennes",
+ "ext_ids": {}
+ }
+release_year_padding: 1
+expected:
+ - yvqtz2zvkzcbpj4jxrp7bvydfu
+ - lttg27o7mjganpkhrgy3xyv7vu
diff --git a/tests/files/fuzzy_release_matcher/8.yaml b/tests/files/fuzzy_release_matcher/8.yaml
new file mode 100644
index 0000000..b43e53a
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/8.yaml
@@ -0,0 +1,139 @@
+about: a full document, https://fatcat.wiki/release/yvqtz2zvkzcbpj4jxrp7bvydfu
+input: >
+ {
+ "abstracts": [],
+ "refs": [],
+ "contribs": [
+ {
+ "index": 0,
+ "raw_name": "Annelies Schulte Nordholt",
+ "role": "author",
+ "extra": {
+ "seq": "first"
+ }
+ }
+ ],
+ "publisher": "Uopen Journals",
+ "pages": "66",
+ "ext_ids": {
+ "doi": "10.18352/bmgn-lchr.128"
+ },
+ "release_year": 2008,
+ "release_date": "2008-02-19",
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "container_id": "sm7svbj64vc55gj4p23t7c3lrm",
+ "webcaptures": [],
+ "filesets": [],
+ "files": [
+ {
+ "release_ids": [
+ "yvqtz2zvkzcbpj4jxrp7bvydfu"
+ ],
+ "mimetype": "application/pdf",
+ "urls": [
+ {
+ "url": "https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/",
+ "rel": "publisher"
+ },
+ {
+ "url": "https://web.archive.org/web/20200209043715/https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/",
+ "rel": "webarchive"
+ }
+ ],
+ "sha256": "96f3552fa3eee10282109dd994f6993caf44627946317d03862a5df167140b23",
+ "sha1": "a9ba7c2038e2a77ac1b1144344443a3835d83c40",
+ "md5": "7dae3ec6c1d65cae6a91554071cc9625",
+ "size": 889420,
+ "revision": "57e3b801-0d84-405b-be8b-6b2b0583cd75",
+ "ident": "oew6z4a6gvfqxc5kiy2r62ucfq",
+ "state": "active"
+ }
+ ],
+ "container": {
+ "wikidata_qid": "Q15763709",
+ "issnp": "1873-5045",
+ "issne": "1873-5045",
+ "issnl": "1873-5045",
+ "publisher": "Uopen Journals",
+ "name": "Relief: Revue Électronique de Littérature Francaise",
+ "extra": {
+ "country": "nl",
+ "default_license": "CC-BY",
+ "doaj": {
+ "as_of": "2021-11-20",
+ "default_license": "CC-BY",
+ "seal": false
+ },
+ "kbart": {
+ "clockss": {
+ "year_spans": [
+ [
+ 2007,
+ 2016
+ ]
+ ]
+ },
+ "lockss": {
+ "year_spans": [
+ [
+ 2007,
+ 2019
+ ]
+ ]
+ },
+ "pkp_pln": {
+ "year_spans": [
+ [
+ 2007,
+ 2021
+ ]
+ ]
+ },
+ "portico": {
+ "year_spans": [
+ [
+ 2007,
+ 2017
+ ]
+ ]
+ }
+ },
+ "languages": [
+ "en"
+ ],
+ "publisher_type": "unipress",
+ "road": {
+ "as_of": "2018-01-24"
+ },
+ "sherpa_romeo": {
+ "color": "blue"
+ },
+ "szczepanski": {
+ "as_of": "2018"
+ },
+ "urls": [
+ "https://www.revue-relief.org/",
+ "http://www.revue-relief.org/index.php/relief",
+ "http://www.revue-relief.org/index.php/relief/about"
+ ]
+ },
+ "revision": "2f36f957-7b60-4452-9310-1bd5e0035c0e",
+ "ident": "sm7svbj64vc55gj4p23t7c3lrm",
+ "state": "active"
+ },
+ "work_id": "qcpd2i2txfdi5emqb7fxsawk6e",
+ "title": "Georges Perec: topographies parisiennes du flâneur",
+ "state": "active",
+ "ident": "yvqtz2zvkzcbpj4jxrp7bvydfu",
+ "revision": "c9e80d74-8c4f-47a7-b49a-689f26856dff",
+ "extra": {
+ "crossref": {
+ "type": "journal-article"
+ }
+ }
+ }
+release_year_padding: 1
+expected:
+ - yvqtz2zvkzcbpj4jxrp7bvydfu
+ - lttg27o7mjganpkhrgy3xyv7vu
diff --git a/tests/files/fuzzy_release_matcher/9.yaml b/tests/files/fuzzy_release_matcher/9.yaml
new file mode 100644
index 0000000..b43e53a
--- /dev/null
+++ b/tests/files/fuzzy_release_matcher/9.yaml
@@ -0,0 +1,139 @@
+about: a full document, https://fatcat.wiki/release/yvqtz2zvkzcbpj4jxrp7bvydfu
+input: >
+ {
+ "abstracts": [],
+ "refs": [],
+ "contribs": [
+ {
+ "index": 0,
+ "raw_name": "Annelies Schulte Nordholt",
+ "role": "author",
+ "extra": {
+ "seq": "first"
+ }
+ }
+ ],
+ "publisher": "Uopen Journals",
+ "pages": "66",
+ "ext_ids": {
+ "doi": "10.18352/bmgn-lchr.128"
+ },
+ "release_year": 2008,
+ "release_date": "2008-02-19",
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "container_id": "sm7svbj64vc55gj4p23t7c3lrm",
+ "webcaptures": [],
+ "filesets": [],
+ "files": [
+ {
+ "release_ids": [
+ "yvqtz2zvkzcbpj4jxrp7bvydfu"
+ ],
+ "mimetype": "application/pdf",
+ "urls": [
+ {
+ "url": "https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/",
+ "rel": "publisher"
+ },
+ {
+ "url": "https://web.archive.org/web/20200209043715/https://www.revue-relief.org/articles/10.18352/relief.128/galley/159/download/",
+ "rel": "webarchive"
+ }
+ ],
+ "sha256": "96f3552fa3eee10282109dd994f6993caf44627946317d03862a5df167140b23",
+ "sha1": "a9ba7c2038e2a77ac1b1144344443a3835d83c40",
+ "md5": "7dae3ec6c1d65cae6a91554071cc9625",
+ "size": 889420,
+ "revision": "57e3b801-0d84-405b-be8b-6b2b0583cd75",
+ "ident": "oew6z4a6gvfqxc5kiy2r62ucfq",
+ "state": "active"
+ }
+ ],
+ "container": {
+ "wikidata_qid": "Q15763709",
+ "issnp": "1873-5045",
+ "issne": "1873-5045",
+ "issnl": "1873-5045",
+ "publisher": "Uopen Journals",
+ "name": "Relief: Revue Électronique de Littérature Francaise",
+ "extra": {
+ "country": "nl",
+ "default_license": "CC-BY",
+ "doaj": {
+ "as_of": "2021-11-20",
+ "default_license": "CC-BY",
+ "seal": false
+ },
+ "kbart": {
+ "clockss": {
+ "year_spans": [
+ [
+ 2007,
+ 2016
+ ]
+ ]
+ },
+ "lockss": {
+ "year_spans": [
+ [
+ 2007,
+ 2019
+ ]
+ ]
+ },
+ "pkp_pln": {
+ "year_spans": [
+ [
+ 2007,
+ 2021
+ ]
+ ]
+ },
+ "portico": {
+ "year_spans": [
+ [
+ 2007,
+ 2017
+ ]
+ ]
+ }
+ },
+ "languages": [
+ "en"
+ ],
+ "publisher_type": "unipress",
+ "road": {
+ "as_of": "2018-01-24"
+ },
+ "sherpa_romeo": {
+ "color": "blue"
+ },
+ "szczepanski": {
+ "as_of": "2018"
+ },
+ "urls": [
+ "https://www.revue-relief.org/",
+ "http://www.revue-relief.org/index.php/relief",
+ "http://www.revue-relief.org/index.php/relief/about"
+ ]
+ },
+ "revision": "2f36f957-7b60-4452-9310-1bd5e0035c0e",
+ "ident": "sm7svbj64vc55gj4p23t7c3lrm",
+ "state": "active"
+ },
+ "work_id": "qcpd2i2txfdi5emqb7fxsawk6e",
+ "title": "Georges Perec: topographies parisiennes du flâneur",
+ "state": "active",
+ "ident": "yvqtz2zvkzcbpj4jxrp7bvydfu",
+ "revision": "c9e80d74-8c4f-47a7-b49a-689f26856dff",
+ "extra": {
+ "crossref": {
+ "type": "journal-article"
+ }
+ }
+ }
+release_year_padding: 1
+expected:
+ - yvqtz2zvkzcbpj4jxrp7bvydfu
+ - lttg27o7mjganpkhrgy3xyv7vu