diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-01-19 00:54:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-01-19 19:49:04 -0800 |
commit | 9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24 (patch) | |
tree | a7cc56ffac34dd79f57fdc7f7201cf2fd0f3a86f /fatcat_scholar | |
parent | 38665ebca0c6ff9fa3c9fd2c4d421c71055d8f2f (diff) | |
download | fatcat-scholar-9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24.tar.gz fatcat-scholar-9cc666cd7baba1dc2bfb9e553a2c021fbeb61c24.zip |
parse: handle pre-quoted strings better
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/query_parse.py | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/fatcat_scholar/query_parse.py b/fatcat_scholar/query_parse.py index fd42c6e..0aae066 100644 --- a/fatcat_scholar/query_parse.py +++ b/fatcat_scholar/query_parse.py @@ -15,6 +15,9 @@ def _clean_token(raw: str) -> str: if len(raw.split()) > 1: # has whitespace, will get quoted return raw + if '"' in raw: + # is quoted already + return raw if "/" in raw or raw.endswith(":") or raw.endswith("!") or raw.endswith("?"): return '"{}"'.format(raw) if raw.startswith("[") and raw.endswith("]"): @@ -45,7 +48,7 @@ def pre_parse_query(raw: str) -> str: lex.commenters = "" lex.whitespace_split = True tokens = list(map(_clean_token, list(lex))) - print(list(tokens)) + # print(list(tokens)) return " ".join(tokens) @@ -62,6 +65,10 @@ def test_pre_parse_query() -> None: assert pre_parse_query('"a/B thing"') == '"a/B thing"' assert pre_parse_query("Krämer") == "Krämer" assert ( + pre_parse_query('"10.1093/qjmed/os-14.56.398"') + == '"10.1093/qjmed/os-14.56.398"' + ) + assert ( pre_parse_query("this (is my) paper: here are the results") == 'this (is my) "paper:" here are the results' ) @@ -138,6 +145,8 @@ def test_sniff_citation_query() -> None: sniff_citation_query("(title:foo OR title:bar)^1.5 (body:foo OR body:bar)") is False ) + assert sniff_citation_query("DR. SCHAUDINN'S WORK ON BLOOD PARASITES") is True + assert sniff_citation_query('"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES"') is True assert ( sniff_citation_query( '"DR. SCHAUDINN\'S WORK ON BLOOD PARASITES." BMJ (Clinical Research Edition) (1905): 442-444' |