aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-01-19 00:54:21 -0800
committerBryan Newbold <bnewbold@archive.org>2021-01-19 19:49:04 -0800
commit9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24 (patch)
treea7cc56ffac34dd79f57fdc7f7201cf2fd0f3a86f /fatcat_scholar
parent38665ebca0c6ff9fa3c9fd2c4d421c71055d8f2f (diff)
downloadfatcat-scholar-9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24.tar.gz
fatcat-scholar-9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24.zip
parse: handle pre-quoted strings better
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/query_parse.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/fatcat_scholar/query_parse.py b/fatcat_scholar/query_parse.py
index fd42c6e..0aae066 100644
--- a/fatcat_scholar/query_parse.py
+++ b/fatcat_scholar/query_parse.py
@@ -15,6 +15,9 @@ def _clean_token(raw: str) -> str:
if len(raw.split()) > 1:
# has whitespace, will get quoted
return raw
+ if '"' in raw:
+ # is quoted already
+ return raw
if "/" in raw or raw.endswith(":") or raw.endswith("!") or raw.endswith("?"):
return '"{}"'.format(raw)
if raw.startswith("[") and raw.endswith("]"):
@@ -45,7 +48,7 @@ def pre_parse_query(raw: str) -> str:
lex.commenters = ""
lex.whitespace_split = True
tokens = list(map(_clean_token, list(lex)))
- print(list(tokens))
+ # print(list(tokens))
return " ".join(tokens)
@@ -62,6 +65,10 @@ def test_pre_parse_query() -> None:
assert pre_parse_query('"a/B thing"') == '"a/B thing"'
assert pre_parse_query("Krämer") == "Krämer"
assert (
+ pre_parse_query('"10.1093/qjmed/os-14.56.398"')
+ == '"10.1093/qjmed/os-14.56.398"'
+ )
+ assert (
pre_parse_query("this (is my) paper: here are the results")
== 'this (is my) "paper:" here are the results'
)
@@ -138,6 +145,8 @@ def test_sniff_citation_query() -> None:
sniff_citation_query("(title:foo OR title:bar)^1.5 (body:foo OR body:bar)")
is False
)
+ assert sniff_citation_query("DR. SCHAUDINN'S WORK ON BLOOD PARASITES") is True
+ assert sniff_citation_query('"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES"') is True
assert (
sniff_citation_query(
'"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES." BMJ (Clinical Research Edition) (1905): 442-444'