From 00dac1ff90b1f0797fe71d2edd787f731c51bfb2 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sat, 3 Jul 2021 21:26:47 +0200 Subject: refcat: remove unused code --- python/refcat/utils.py | 238 --------------------------------------------- python/tests/test_utils.py | 50 ---------- 2 files changed, 288 deletions(-) delete mode 100644 python/tests/test_utils.py (limited to 'python') diff --git a/python/refcat/utils.py b/python/refcat/utils.py index 2fbaec5..a665a25 100644 --- a/python/refcat/utils.py +++ b/python/refcat/utils.py @@ -2,245 +2,7 @@ Assorted utilities. """ -import collections import io -import re - -DOI_PATTERN = re.compile(r"10.[0-9]{1,6}/[^ ]*[\w]") - -# via: https://gist.github.com/gruber/8891611 -URL_PATTERN = re.compile(r"""(?xi) -\b -( # Capture 1: entire matched URL - (?: - https?: # URL protocol and colon - (?: - /{1,3} # 1-3 slashes - | # or - [a-z0-9%] # Single letter or digit or '%' - # (Trying not to match e.g. "URI::Escape") - ) - | # or - # looks like domain name followed by a slash: - [a-z0-9.\-]+[.] - (?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj| Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw) - / - ) - (?: # One or more: - [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] - | # or - \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) - | - \([^\s]+?\) # balanced parens, non-recursive: (…) - )+ - (?: # End with: - \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) - | - \([^\s]+?\) # balanced parens, non-recursive: (…) - | # or - [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars - ) - | # OR, the following to match naked domains: - (?: - (?3.0.CO") == [ - "10.1002/(SICI)1097-4679(200004)56:4<519::AID-JCLP6>3.0.CO" - ] - assert extract_dois("!10.1002/ajpa.20674.!") == ["10.1002/ajpa.20674"] - assert extract_dois("!10.1002/chem.201700953.!") == ["10.1002/chem.201700953"] -- cgit v1.2.3