aboutsummaryrefslogtreecommitdiffstats
path: root/tests/files
diff options
context:
space:
mode:
Diffstat (limited to 'tests/files')
-rw-r--r--tests/files/README.md5
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_exact_contrib/0.yaml13
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_exact_contrib/1.yaml13
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_exact_contrib/2.yaml16
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_exact_contrib/3.yaml16
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_exact_contrib/4.yaml16
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/0.yaml14
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/1.yaml14
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/2.yaml17
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/3.yaml17
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/4.yaml17
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/5.yaml17
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/6.yaml14
-rw-r--r--tests/files/fuzzy_release_match_release_exact_title_partial_contrib/7.yaml17
14 files changed, 206 insertions, 0 deletions
diff --git a/tests/files/README.md b/tests/files/README.md
new file mode 100644
index 0000000..ef674d6
--- /dev/null
+++ b/tests/files/README.md
@@ -0,0 +1,5 @@
+# Matcher Test Files
+
+The goal here is to have a mostly language-independent test cases for matching.
+
+Each subdirectory corresponds to a test function and contains examples for it.
diff --git a/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/0.yaml b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/0.yaml
new file mode 100644
index 0000000..2df8d9a
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/0.yaml
@@ -0,0 +1,13 @@
+title: titles are case insensitive
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Michael Adams"
+ }
+ ],
+ "title": "digital libraries",
+ "ext_ids": {}
+ }
+expected: 2
diff --git a/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/1.yaml b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/1.yaml
new file mode 100644
index 0000000..1070408
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/1.yaml
@@ -0,0 +1,13 @@
+title: another vanilla query
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Poul-Henning Kamp"
+ }
+ ],
+ "title": "The hyperdimensional tar pit",
+ "ext_ids": {}
+ }
+expected: 2
diff --git a/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/2.yaml b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/2.yaml
new file mode 100644
index 0000000..882e746
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/2.yaml
@@ -0,0 +1,16 @@
+title: order of contribs does not matter
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Maurice Florence"
+ },
+ {
+ "raw_name": "Tuomo Tiisala"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+expected: 1
diff --git a/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/3.yaml b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/3.yaml
new file mode 100644
index 0000000..0a2ad12
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/3.yaml
@@ -0,0 +1,16 @@
+title: order of contribs does not matter
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Tuomo Tiisala"
+ },
+ {
+ "raw_name": "Maurice Florence"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+expected: 1
diff --git a/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/4.yaml b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/4.yaml
new file mode 100644
index 0000000..36ea0fe
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_exact_contrib/4.yaml
@@ -0,0 +1,16 @@
+title: short version of name should not work
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Tuomo Tiisala"
+ },
+ {
+ "raw_name": "M. Florence"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+expected: 0
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/0.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/0.yaml
new file mode 100644
index 0000000..07230e8
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/0.yaml
@@ -0,0 +1,14 @@
+title: titles are case insensitive
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Michael Adams"
+ }
+ ],
+ "title": "digital libraries",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 1.0
+expected: 2
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/1.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/1.yaml
new file mode 100644
index 0000000..62e9586
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/1.yaml
@@ -0,0 +1,14 @@
+title: another vanilla query
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Poul-Henning Kamp"
+ }
+ ],
+ "title": "The hyperdimensional tar pit",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 1.0
+expected: 2
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/2.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/2.yaml
new file mode 100644
index 0000000..b89e825
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/2.yaml
@@ -0,0 +1,17 @@
+title: order of contribs does not matter
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Maurice Florence"
+ },
+ {
+ "raw_name": "Tuomo Tiisala"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 1.0
+expected: 1
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/3.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/3.yaml
new file mode 100644
index 0000000..3de7262
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/3.yaml
@@ -0,0 +1,17 @@
+title: order of contribs does not matter
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Tuomo Tiisala"
+ },
+ {
+ "raw_name": "Maurice Florence"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 1.0
+expected: 1
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/4.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/4.yaml
new file mode 100644
index 0000000..39fb065
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/4.yaml
@@ -0,0 +1,17 @@
+title: short version of name should not work
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Tuomo Tiisala"
+ },
+ {
+ "raw_name": "M. Florence"
+ }
+ ],
+ "title": "Foucault",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 1.0
+expected: 0
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/5.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/5.yaml
new file mode 100644
index 0000000..fff19fa
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/5.yaml
@@ -0,0 +1,17 @@
+title: here, Iz Beltagy is missing from author, but still retrieved
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Arman Cohan"
+ },
+ {
+ "raw_name": "Kyle Lo"
+ }
+ ],
+ "title": "SciBERT: A Pretrained Language Model for Scientific Text",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 0.5
+expected: 3
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/6.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/6.yaml
new file mode 100644
index 0000000..d4e0025
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/6.yaml
@@ -0,0 +1,14 @@
+title: here, 2/3 authors are missing, we fail with jaccard index 0.5
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Arman Cohan"
+ }
+ ],
+ "title": "SciBERT: A Pretrained Language Model for Scientific Text",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 0.5
+expected: 0
diff --git a/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/7.yaml b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/7.yaml
new file mode 100644
index 0000000..23d5a8d
--- /dev/null
+++ b/tests/files/fuzzy_release_match_release_exact_title_partial_contrib/7.yaml
@@ -0,0 +1,17 @@
+title: match, despite trailing whitespace
+date: 2021-11-08
+input: >
+ {
+ "contribs": [
+ {
+ "raw_name": "Arman Cohan"
+ },
+ {
+ "raw_name": "Kyle Lo"
+ }
+ ],
+ "title": "SciBERT: A Pretrained Language Model for Scientific Text ",
+ "ext_ids": {}
+ }
+jaccard_index_threshold: 0.5
+expected: 3