From 30248904e932e13c3dbb90579bd85953168ad770 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 10 Dec 2020 14:41:05 +0100 Subject: add cases --- fuzzycat/common.py | 1 + notes/2020_11_testruns.md | 11 ++++++++ tests/data/release/27lrseg7jfhxbdxohph7il7a7m | 32 ++++++++++++++++++++++ tests/data/release/4m6ijk5gu5gxhcbvd2f4i2xk5u | 39 +++++++++++++++++++++++++++ tests/data/release/a3kmwzn4kjerbingv7oyfs5gwe | 21 +++++++++++++++ tests/data/release/e3fs7ttdbrds3bvsbm7lynzlpu | 38 ++++++++++++++++++++++++++ tests/data/release/gtsbvudmjzdeppqgzjpmfedycq | 32 ++++++++++++++++++++++ tests/data/release/tebqkxnjpzfxnpsqmt5klv2ppm | 38 ++++++++++++++++++++++++++ tests/data/release/uqyjav3arngq7bqmzsllxrkpmu | 27 +++++++++++++++++++ tests/data/release/vpswmj3cgfhktggwvmz33fkwuq | 31 +++++++++++++++++++++ tests/data/verify.csv | 4 +++ tests/test_verify.py | 1 + 12 files changed, 275 insertions(+) create mode 100644 tests/data/release/27lrseg7jfhxbdxohph7il7a7m create mode 100644 tests/data/release/4m6ijk5gu5gxhcbvd2f4i2xk5u create mode 100644 tests/data/release/a3kmwzn4kjerbingv7oyfs5gwe create mode 100644 tests/data/release/e3fs7ttdbrds3bvsbm7lynzlpu create mode 100644 tests/data/release/gtsbvudmjzdeppqgzjpmfedycq create mode 100644 tests/data/release/tebqkxnjpzfxnpsqmt5klv2ppm create mode 100644 tests/data/release/uqyjav3arngq7bqmzsllxrkpmu create mode 100644 tests/data/release/vpswmj3cgfhktggwvmz33fkwuq diff --git a/fuzzycat/common.py b/fuzzycat/common.py index 20a5ddd..2bb25c6 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -10,6 +10,7 @@ class Status(str, Enum): EXACT = 'exact' STRONG = 'strong' WEAK = 'weak' + TODO = 'todo' class OK(str, Enum): diff --git a/notes/2020_11_testruns.md b/notes/2020_11_testruns.md index 26dd063..97b9252 100644 --- a/notes/2020_11_testruns.md +++ b/notes/2020_11_testruns.md @@ -215,9 +215,20 @@ Ok. * https://fatcat.wiki/release/t2bdv2otczav5du5b65q46oivq https://fatcat.wiki/release/c5dj5ifvfnfidejfl3wpbigcqa Status.AMBIGUOUS OK.DUMMY * https://fatcat.wiki/release/bruczmzvnzhtdkd2tf3meg3oou https://fatcat.wiki/release/a7wuehxrv5edpb5265qx27yvmy Status.AMBIGUOUS OK.DUMMY * https://fatcat.wiki/release/uqyjav3arngq7bqmzsllxrkpmu https://fatcat.wiki/release/tebqkxnjpzfxnpsqmt5klv2ppm Status.AMBIGUOUS OK.DUMMY + +Different reviews. "NA" author, need to sort that out. + * https://fatcat.wiki/release/vpswmj3cgfhktggwvmz33fkwuq https://fatcat.wiki/release/e3fs7ttdbrds3bvsbm7lynzlpu Status.AMBIGUOUS OK.DUMMY + +Different reviews. + * https://fatcat.wiki/release/gtsbvudmjzdeppqgzjpmfedycq https://fatcat.wiki/release/27lrseg7jfhxbdxohph7il7a7m Status.AMBIGUOUS OK.DUMMY + +Ok. + * https://fatcat.wiki/release/a3kmwzn4kjerbingv7oyfs5gwe https://fatcat.wiki/release/4m6ijk5gu5gxhcbvd2f4i2xk5u Status.AMBIGUOUS OK.DUMMY + + * https://fatcat.wiki/release/swpqbs3zo5co5fzfpvkg3abtfa https://fatcat.wiki/release/dyye7bybcfbifebunhxtwrc4jm Status.AMBIGUOUS OK.DUMMY * https://fatcat.wiki/release/m2smjyfyfzfkrdq2narn7fm24a https://fatcat.wiki/release/u2j2domfnjdppnxpggjzxasoou Status.AMBIGUOUS OK.DUMMY * https://fatcat.wiki/release/254alcrrgfcz7l6j6kce7xqoli https://fatcat.wiki/release/kix6fwgliffudepddlnflom6pq Status.AMBIGUOUS OK.DUMMY diff --git a/tests/data/release/27lrseg7jfhxbdxohph7il7a7m b/tests/data/release/27lrseg7jfhxbdxohph7il7a7m new file mode 100644 index 0000000..7b4fafc --- /dev/null +++ b/tests/data/release/27lrseg7jfhxbdxohph7il7a7m @@ -0,0 +1,32 @@ +{ + "abstracts": [], + "container_id": "nbmd5wsw4jbdpdxl5dijlj6og4", + "contribs": [], + "ext_ids": { + "jstor": "20559948" + }, + "extra": { + "jstor": { + "issue_id": "i20559932", + "journal_ids": [ + "artprog", + "j50000510" + ] + } + }, + "ident": "27lrseg7jfhxbdxohph7il7a7m", + "issue": "2", + "language": "en", + "pages": "44", + "publisher": "American Federation of Arts", + "refs": [], + "release_date": "1909-12-01", + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1909, + "revision": "6d1f2e84-fc19-41ef-af9d-15f41ed951bb", + "state": "active", + "title": "State Art Commission", + "volume": "1", + "work_id": "ig234zpdw5gtdm5a2ceays6lu4" +} diff --git a/tests/data/release/4m6ijk5gu5gxhcbvd2f4i2xk5u b/tests/data/release/4m6ijk5gu5gxhcbvd2f4i2xk5u new file mode 100644 index 0000000..3a7f0a4 --- /dev/null +++ b/tests/data/release/4m6ijk5gu5gxhcbvd2f4i2xk5u @@ -0,0 +1,39 @@ +{ + "abstracts": [], + "container_id": "weq5aye5ibfdvmhgqhoub6xv2y", + "contribs": [ + { + "extra": { + "seq": "first" + }, + "index": 0, + "raw_name": "Robert LaPorte", + "role": "author" + }, + { + "index": 1, + "raw_name": "G. W. Choudhury", + "role": "author" + } + ], + "ext_ids": { + "doi": "10.2307/2054147" + }, + "extra": { + "crossref": { + "type": "journal-article" + } + }, + "ident": "4m6ijk5gu5gxhcbvd2f4i2xk5u", + "pages": "586", + "publisher": "Cambridge University Press (CUP)", + "refs": [], + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1977, + "revision": "301e0916-e548-436d-9cf7-203b7e6ec449", + "state": "active", + "title": "India, Pakistan, Bangladesh, and the Major Powers: Politics of a Divided Subcontinent.", + "volume": "36", + "work_id": "buqjzw6kh5gadp5php4y7pca5e" +} diff --git a/tests/data/release/a3kmwzn4kjerbingv7oyfs5gwe b/tests/data/release/a3kmwzn4kjerbingv7oyfs5gwe new file mode 100644 index 0000000..b8cbca3 --- /dev/null +++ b/tests/data/release/a3kmwzn4kjerbingv7oyfs5gwe @@ -0,0 +1,21 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.1163/2468-1733_shafr_sim210030004" + }, + "extra": { + "container_name": "The SHAFR Guide Online", + "crossref": { + "type": "dataset" + } + }, + "ident": "a3kmwzn4kjerbingv7oyfs5gwe", + "publisher": "Brill Academic Publishers", + "refs": [], + "release_type": "dataset", + "revision": "adc23757-2055-46b0-a5c7-c719c6fc7c8a", + "state": "active", + "title": "India, Pakistan, Bangladesh, and the Major Powers: Politics of a Divided Subcontinent", + "work_id": "kmrv52rjs5g3zfqem67nxsfn3i" +} diff --git a/tests/data/release/e3fs7ttdbrds3bvsbm7lynzlpu b/tests/data/release/e3fs7ttdbrds3bvsbm7lynzlpu new file mode 100644 index 0000000..83ae531 --- /dev/null +++ b/tests/data/release/e3fs7ttdbrds3bvsbm7lynzlpu @@ -0,0 +1,38 @@ +{ + "abstracts": [], + "container_id": "eoaasu7cqnbqxm7j4qk2xszfpq", + "contribs": [ + { + "extra": { + "seq": "first" + }, + "index": 0, + "raw_name": "Nathan Kantrowitz", + "role": "author" + }, + { + "index": 1, + "raw_name": "Joseph P. Fitzpatrick", + "role": "author" + } + ], + "ext_ids": { + "doi": "10.2307/2062139" + }, + "extra": { + "crossref": { + "type": "journal-article" + } + }, + "ident": "e3fs7ttdbrds3bvsbm7lynzlpu", + "pages": "83", + "publisher": "SAGE Publications", + "refs": [], + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1973, + "revision": "3ba93895-2862-4092-9ce3-075a3bf673d7", + "state": "active", + "title": "Puerto Rican Americans: The Meaning of Migration to the Mainland.", + "work_id": "obvoaf2i7jdvtk5wtjxbh65xwu" +} diff --git a/tests/data/release/gtsbvudmjzdeppqgzjpmfedycq b/tests/data/release/gtsbvudmjzdeppqgzjpmfedycq new file mode 100644 index 0000000..4e2f945 --- /dev/null +++ b/tests/data/release/gtsbvudmjzdeppqgzjpmfedycq @@ -0,0 +1,32 @@ +{ + "abstracts": [], + "container_id": "nbmd5wsw4jbdpdxl5dijlj6og4", + "contribs": [], + "ext_ids": { + "jstor": "20560067" + }, + "extra": { + "jstor": { + "issue_id": "i20560054", + "journal_ids": [ + "artprog", + "j50000510" + ] + } + }, + "ident": "gtsbvudmjzdeppqgzjpmfedycq", + "issue": "6", + "language": "en", + "pages": "171", + "publisher": "American Federation of Arts", + "refs": [], + "release_date": "1910-04-01", + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1910, + "revision": "353dec0f-2e62-48b2-af1a-37eeb3a9b438", + "state": "active", + "title": "State Art Commission", + "volume": "1", + "work_id": "tefim3p4hnbebc4ozkleyshaqq" +} diff --git a/tests/data/release/tebqkxnjpzfxnpsqmt5klv2ppm b/tests/data/release/tebqkxnjpzfxnpsqmt5klv2ppm new file mode 100644 index 0000000..894cb0d --- /dev/null +++ b/tests/data/release/tebqkxnjpzfxnpsqmt5klv2ppm @@ -0,0 +1,38 @@ +{ + "abstracts": [], + "container_id": "w5pzcsrl7ndtnhpc2e2bzavyjy", + "contribs": [ + { + "extra": { + "seq": "first" + }, + "index": 0, + "raw_name": "&NA; T D", + "role": "author" + } + ], + "ext_ids": { + "doi": "10.1097/00000441-195606000-00020" + }, + "extra": { + "crossref": { + "type": "journal-article" + }, + "subtitle": [ + "" + ] + }, + "ident": "tebqkxnjpzfxnpsqmt5klv2ppm", + "language": "en", + "pages": "727-728", + "publisher": "Elsevier BV", + "refs": [], + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1956, + "revision": "3ff23bc1-390e-418d-ab7f-3a5ffdbf3d9d", + "state": "active", + "title": "Symposium on Atherosclerosis", + "volume": "231", + "work_id": "47uhom6dhvh2pacv7b4ohobqnq" +} diff --git a/tests/data/release/uqyjav3arngq7bqmzsllxrkpmu b/tests/data/release/uqyjav3arngq7bqmzsllxrkpmu new file mode 100644 index 0000000..97d4d1c --- /dev/null +++ b/tests/data/release/uqyjav3arngq7bqmzsllxrkpmu @@ -0,0 +1,27 @@ +{ + "abstracts": [], + "container_id": "pg5nv3ik7jhu5kcz3czdz75dvm", + "contribs": [], + "ext_ids": { + "doi": "10.1001/jama.1955.02960240099025" + }, + "extra": { + "crossref": { + "type": "journal-article" + } + }, + "ident": "uqyjav3arngq7bqmzsllxrkpmu", + "language": "en", + "pages": "733", + "publisher": "American Medical Association (AMA)", + "refs": [], + "release_date": "1955-10-15", + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1955, + "revision": "9154ed48-0221-436c-a314-a12d24546dce", + "state": "active", + "title": "Symposium on Atherosclerosis", + "volume": "159", + "work_id": "rolynr5v5fg3rkln3geccnte2m" +} diff --git a/tests/data/release/vpswmj3cgfhktggwvmz33fkwuq b/tests/data/release/vpswmj3cgfhktggwvmz33fkwuq new file mode 100644 index 0000000..9b51c4c --- /dev/null +++ b/tests/data/release/vpswmj3cgfhktggwvmz33fkwuq @@ -0,0 +1,31 @@ +{ + "abstracts": [], + "container_id": "on7pto6emjcyjebuutelzf34nq", + "contribs": [], + "ext_ids": { + "doi": "10.1215/00182168-53.1.174a" + }, + "extra": { + "crossref": { + "subject": [ + "Cultural Studies", + "History" + ], + "type": "journal-article" + } + }, + "ident": "vpswmj3cgfhktggwvmz33fkwuq", + "language": "en", + "pages": "174-175", + "publisher": "Duke University Press", + "refs": [], + "release_date": "1973-02-01", + "release_stage": "published", + "release_type": "article-journal", + "release_year": 1973, + "revision": "d2975fda-550b-4204-9a85-d2a7aa31d9be", + "state": "active", + "title": "Puerto Rican Americans: The Meaning of Migration to the Mainland", + "volume": "53", + "work_id": "3jfvnundd5d5hppqz5v3gbqvau" +} diff --git a/tests/data/verify.csv b/tests/data/verify.csv index d29f849..6cfb505 100644 --- a/tests/data/verify.csv +++ b/tests/data/verify.csv @@ -149,3 +149,7 @@ tm3gaiumkvb3xc7t3i6suna6u4,pobnow7sxfhnxhltgwpru5k7oi,Status.DIFFERENT,Miss.RELE lqswbciv2vfkzit5zamjaqik6m,zularouecbg5fg4nd6yswxf3s4,Status.DIFFERENT,Miss.JSTOR_ID j6ipokw3lfflhl2de7afxhac2a,rbgpleyhanakxing2f3234d7xq,Status.AMBIGUOUS, bruczmzvnzhtdkd2tf3meg3oou,a7wuehxrv5edpb5265qx27yvmy,Status.AMBIGUOUS, +tebqkxnjpzfxnpsqmt5klv2ppm,uqyjav3arngq7bqmzsllxrkpmu,Status.DIFFERENT, +e3fs7ttdbrds3bvsbm7lynzlpu,vpswmj3cgfhktggwvmz33fkwuq,Status.DIFFERENT, +gtsbvudmjzdeppqgzjpmfedycq,27lrseg7jfhxbdxohph7il7a7m,Status.DIFFERENT,Miss.JSTOR_ID +a3kmwzn4kjerbingv7oyfs5gwe,4m6ijk5gu5gxhcbvd2f4i2xk5u,Status.DIFFERENT,Miss.RELEASE_TYPE diff --git a/tests/test_verify.py b/tests/test_verify.py index e94b57a..07808af 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -17,6 +17,7 @@ status_mapping = { "Status.EXACT": Status.EXACT, "Status.STRONG": Status.STRONG, "Status.WEAK": Status.WEAK, + "Status.TODO": Status.TODO, } logger = logging.getLogger('test_verify') -- cgit v1.2.3