aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py22
-rw-r--r--notes/2020_11_testruns.md2
-rw-r--r--tests/data/release/5yixxzyl3vh4xd56lwcraowgty43
-rw-r--r--tests/data/release/pobnow7sxfhnxhltgwpru5k7oi21
-rw-r--r--tests/data/release/tm3gaiumkvb3xc7t3i6suna6u435
-rw-r--r--tests/data/release/uplqxenmk5axjes6zokml6q73y26
-rw-r--r--tests/data/release/yespzqkm2zed7n4vhjpkddap5e24
-rw-r--r--tests/data/verify.csv3
9 files changed, 175 insertions, 2 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 5bf033c..20a5ddd 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -56,6 +56,7 @@ class Miss(str, Enum):
DATASET_DOI = 'miss.dataset_doi'
JSTOR_ID = 'miss.jstor_id'
NUM_DIFF = 'miss.num_diff'
+ PAGE_COUNT = 'miss.page_count'
RELEASE_TYPE = 'miss.release_type'
SHARED_DOI_PREFIX = 'miss.shared_doi_prefix'
SHORT_TITLE = 'miss.short_title'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 88e83d5..76571da 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -322,6 +322,8 @@ def compare(a, b):
])
if len(types & ignore_release_types) == 0:
return (Status.DIFFERENT, Miss.RELEASE_TYPE)
+ if "dataset" in types and ("article" in types or "article-journal" in types):
+ return (Status.DIFFERENT, Miss.RELEASE_TYPE)
except PathAccessError:
pass
@@ -543,4 +545,24 @@ def compare(a, b):
except PathAccessError:
pass
+ # If pages exists, but differ too much, bail out.
+ # https://fatcat.wiki/release/tm3gaiumkvb3xc7t3i6suna6u4
+ # https://fatcat.wiki/release/r6dj63wh3zcrrolisn6xuacnve
+ try:
+ a_pages = glom(a, "pages")
+ b_pages = glom(b, "pages")
+ page_pattern = re.compile("([0-9]{1,})-([0-9]{1,})")
+ a_match = page_pattern.match(a_pages)
+ b_match = page_pattern.match(b_pages)
+ if a_match and b_match:
+ a_start, a_end = a_match.groups()
+ b_start, b_end = b_match.groups()
+ a_num_pages = int(a_end) - int(a_start)
+ b_num_pages = int(b_end) - int(b_start)
+ if a_num_pages >= 0 and b_num_pages >= 0:
+ if abs(a_num_pages - b_num_pages) > 5:
+ return (Status.DIFFERENT, Miss.PAGE_COUNT)
+ except PathAccessError:
+ pass
+
return (Status.AMBIGUOUS, OK.DUMMY)
diff --git a/notes/2020_11_testruns.md b/notes/2020_11_testruns.md
index cfa6c6f..51834fc 100644
--- a/notes/2020_11_testruns.md
+++ b/notes/2020_11_testruns.md
@@ -225,8 +225,6 @@ Hard to say (but seem to be a rerun of an article in a "similar" journal).
Ok.
* [ ] https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m Status.AMBIGUOUS OK.DUMMY
-
-
* [ ] https://fatcat.wiki/release/yespzqkm2zed7n4vhjpkddap5e https://fatcat.wiki/release/5yixxzyl3vh4xd56lwcraowgty Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/47opwjqugjecjmiqgukahw6p2m https://fatcat.wiki/release/real7tmfxjan7j3fgkilt7fze4 Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/gaf7gjwetrbpzntrp4bt4nxaiy https://fatcat.wiki/release/htsa3mrirndbdjtdangr4mzrdu Status.AMBIGUOUS OK.DUMMY
diff --git a/tests/data/release/5yixxzyl3vh4xd56lwcraowgty b/tests/data/release/5yixxzyl3vh4xd56lwcraowgty
new file mode 100644
index 0000000..3fb8fb4
--- /dev/null
+++ b/tests/data/release/5yixxzyl3vh4xd56lwcraowgty
@@ -0,0 +1,43 @@
+{
+ "abstracts": [],
+ "container_id": "cjmzhtkkdjgjphz5zuzahenz2a",
+ "contribs": [
+ {
+ "extra": {
+ "seq": "first"
+ },
+ "index": 0,
+ "raw_name": "WALTER S. ROOT",
+ "role": "author"
+ },
+ {
+ "index": 1,
+ "raw_name": "FREDERICK G. HOFMANN",
+ "role": "author"
+ }
+ ],
+ "ext_ids": {
+ "doi": "10.1097/00000542-196701000-00100"
+ },
+ "extra": {
+ "crossref": {
+ "type": "journal-article"
+ },
+ "subtitle": [
+ ""
+ ]
+ },
+ "ident": "5yixxzyl3vh4xd56lwcraowgty",
+ "language": "en",
+ "pages": "284",
+ "publisher": "Ovid Technologies (Wolters Kluwer Health)",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "release_year": 1967,
+ "revision": "ec95ac81-64ec-41c0-af9e-845f0dd6cbbe",
+ "state": "active",
+ "title": "Physiological Pharmacology. A Comprehensive Treatise",
+ "volume": "28",
+ "work_id": "gznfk7fm4jdwzkbudxyo3kemfu"
+}
diff --git a/tests/data/release/pobnow7sxfhnxhltgwpru5k7oi b/tests/data/release/pobnow7sxfhnxhltgwpru5k7oi
new file mode 100644
index 0000000..d5a5ed8
--- /dev/null
+++ b/tests/data/release/pobnow7sxfhnxhltgwpru5k7oi
@@ -0,0 +1,21 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1163/2352-0248_edn_a2481000"
+ },
+ "extra": {
+ "container_name": "Enzyklopädie der Neuzeit Online",
+ "crossref": {
+ "type": "dataset"
+ }
+ },
+ "ident": "pobnow7sxfhnxhltgwpru5k7oi",
+ "publisher": "Brill Academic Publishers",
+ "refs": [],
+ "release_type": "dataset",
+ "revision": "8266c500-5b31-47e5-94dd-d04d90f5fe45",
+ "state": "active",
+ "title": "Leitbild",
+ "work_id": "bzxfao6pe5hmvar75ebglzyl3i"
+}
diff --git a/tests/data/release/tm3gaiumkvb3xc7t3i6suna6u4 b/tests/data/release/tm3gaiumkvb3xc7t3i6suna6u4
new file mode 100644
index 0000000..4f067cd
--- /dev/null
+++ b/tests/data/release/tm3gaiumkvb3xc7t3i6suna6u4
@@ -0,0 +1,35 @@
+{
+ "abstracts": [],
+ "container_id": "qcsreng6drgiflvgxfo6tbc7cq",
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1111/rsr.14216"
+ },
+ "extra": {
+ "crossref": {
+ "alternative-id": [
+ "10.1111/rsr.14216"
+ ],
+ "archive": [
+ "Portico"
+ ],
+ "subject": [
+ "Religious studies"
+ ],
+ "type": "journal-article"
+ }
+ },
+ "ident": "tm3gaiumkvb3xc7t3i6suna6u4",
+ "language": "en",
+ "pages": "386-386",
+ "publisher": "Wiley",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "release_year": 2019,
+ "revision": "5c4d2543-33a3-4b87-9d20-e49786fd94f7",
+ "state": "active",
+ "title": "JEWISH THOUGHT",
+ "volume": "45",
+ "work_id": "ziwjsjtaxjentaednk52ht5eui"
+}
diff --git a/tests/data/release/uplqxenmk5axjes6zokml6q73y b/tests/data/release/uplqxenmk5axjes6zokml6q73y
new file mode 100644
index 0000000..e4d580c
--- /dev/null
+++ b/tests/data/release/uplqxenmk5axjes6zokml6q73y
@@ -0,0 +1,26 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.5169/seals-463775"
+ },
+ "extra": {
+ "datacite": {
+ "metadataVersion": 2,
+ "resourceType": "Journal Article",
+ "resourceTypeGeneral": "Text"
+ },
+ "release_month": 5
+ },
+ "ident": "uplqxenmk5axjes6zokml6q73y",
+ "publisher": "E. Löpfe-Benz",
+ "refs": [],
+ "release_date": "1931-05-15",
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "release_year": 1931,
+ "revision": "e3572a08-2c3e-4231-8548-6427b11752c3",
+ "state": "active",
+ "title": "Leitbild",
+ "work_id": "fcu63qnsmbfmbc53x5jrhh3lma"
+}
diff --git a/tests/data/release/yespzqkm2zed7n4vhjpkddap5e b/tests/data/release/yespzqkm2zed7n4vhjpkddap5e
new file mode 100644
index 0000000..28316be
--- /dev/null
+++ b/tests/data/release/yespzqkm2zed7n4vhjpkddap5e
@@ -0,0 +1,24 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1016/b978-1-4832-2760-3.50001-0"
+ },
+ "extra": {
+ "container_name": "The Nervous System",
+ "crossref": {
+ "type": "book-chapter"
+ }
+ },
+ "ident": "yespzqkm2zed7n4vhjpkddap5e",
+ "pages": "ii",
+ "publisher": "Elsevier",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "chapter",
+ "release_year": 1967,
+ "revision": "9322dda3-bfde-4eeb-8185-224b11b4eb85",
+ "state": "active",
+ "title": "PHYSIOLOGICAL PHARMACOLOGY: A Comprehensive Treatise",
+ "work_id": "rgluu7ppjrfaznbt254xkn4nou"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 37f4b6e..03fa947 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -143,3 +143,6 @@ f5ebjc63j5dzpct5hsme5j3ote,zeoquc2f4nbmdbmbcbkmkxmtzi,Status.AMBIGUOUS,
zvsffdeufjb5dbchww7ydqdq3a,5rcu6myqx5ezjjytzpvsauyut4,Status.STRONG,OK.PMID_DOI_PAIR
cd5aik2whrd5jlvleyvdq6iwja,kfttghqcsbddvofqd7l4bhtavy,Status.DIFFERENT,Miss.COMPONENT
hwnqyz7n65eabhlivvkipkytji,cwqujxztefdghhssb7ysxj7b5m,Status.STRONG,OK.VERSIONED_DOI
+yespzqkm2zed7n4vhjpkddap5e,5yixxzyl3vh4xd56lwcraowgty,Status.AMBIGUOUS,
+pobnow7sxfhnxhltgwpru5k7oi,uplqxenmk5axjes6zokml6q73y,Status.DIFFERENT,Miss.RELEASE_TYPE
+tm3gaiumkvb3xc7t3i6suna6u4,pobnow7sxfhnxhltgwpru5k7oi,Status.DIFFERENT,Miss.RELEASE_TYPE