summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-01-19 01:06:37 -0800
committerBryan Newbold <bnewbold@archive.org>2021-01-19 19:49:04 -0800
commitaff6473144542e24c0aaa66c514c210eb83bf8a8 (patch)
tree0131cdb13a2bd0e5c9edc8f7359233195a8b0432 /fatcat_scholar
parent89e9149b27263d6128be449c7b12b025cc031292 (diff)
downloadfatcat-scholar-aff6473144542e24c0aaa66c514c210eb83bf8a8.tar.gz
fatcat-scholar-aff6473144542e24c0aaa66c514c210eb83bf8a8.zip
parse: tweak citation sniff routine
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/query_parse.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/fatcat_scholar/query_parse.py b/fatcat_scholar/query_parse.py
index 0aae066..0f5086b 100644
--- a/fatcat_scholar/query_parse.py
+++ b/fatcat_scholar/query_parse.py
@@ -114,6 +114,10 @@ def sniff_citation_query(raw: str) -> bool:
if len(raw) < 12 or len(raw.split()) < 6:
return False
+ # if single quoted string, not a citation
+ if raw.count('"') == 2 and raw.startswith('"') and raw.endswith('"'):
+ return False
+
# if there is a filter query, boost, or fuzzy match, not a citation
if re.search(r'([a-zA-Z]:[^\s])|(["\\)][\^~]\d)', raw):
return False
@@ -132,8 +136,12 @@ def sniff_citation_query(raw: str) -> bool:
char_types["period"] = True
elif c == ",":
char_types["comma"] = True
+ elif c == ";":
+ char_types["semicolon"] = True
+ elif c == "(" or c == ")":
+ char_types["parens"] = True
- if len(char_types) > 2:
+ if len(char_types) >= 4:
return True
return False
@@ -145,8 +153,8 @@ def test_sniff_citation_query() -> None:
sniff_citation_query("(title:foo OR title:bar)^1.5 (body:foo OR body:bar)")
is False
)
- assert sniff_citation_query("DR. SCHAUDINN'S WORK ON BLOOD PARASITES") is True
- assert sniff_citation_query('"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES"') is True
+ assert sniff_citation_query("DR. SCHAUDINN'S WORK ON BLOOD PARASITES") is False
+ assert sniff_citation_query('"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES"') is False
assert (
sniff_citation_query(
'"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES." BMJ (Clinical Research Edition) (1905): 442-444'