aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-03-21 00:36:54 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-03-21 00:36:54 +0100
commite00e979a8b144231ce16aafe6b8482e4104f5e37 (patch)
tree942af1fbb0eeb71625438a2aaa0b1d783b84db0e /python/tests
parentc8d9268759f7da1e050658e135fac0c8f0b6fc53 (diff)
downloadrefcat-e00e979a8b144231ce16aafe6b8482e4104f5e37.tar.gz
refcat-e00e979a8b144231ce16aafe6b8482e4104f5e37.zip
initial import of python tasks
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_utils.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/python/tests/test_utils.py b/python/tests/test_utils.py
new file mode 100644
index 0000000..79c8919
--- /dev/null
+++ b/python/tests/test_utils.py
@@ -0,0 +1,41 @@
+from refcat.utils import extract_urls, extract_dois
+
+
+def test_extract_urls():
+ assert extract_urls("") == []
+ assert extract_urls("abc") == []
+ assert extract_urls("httP//abc") == []
+ assert extract_urls("http//a.com") == ["a.com"]
+ assert extract_urls("http://a.com") == ["http://a.com"]
+ assert extract_urls("http://a.com/b") == ["http://a.com/b"]
+ assert extract_urls("https://a.com/b") == ["https://a.com/b"]
+ assert extract_urls("http=://a.com/b") == ["a.com/"]
+ assert extract_urls("http://www.bioinformatics.babraham.ac.uk/projects/fastqc/") == ["http://www.bioinformatics.babraham.ac.uk/projects/fastqc/"]
+ assert extract_urls(
+ "CertificaçãoDigitalNº1311532/CA40/005129/2012Apensadoao40/006810/2011-1ºTermoAditivonº52/2012aoContratonº282/2011-Celebradoem08/08/2012") == []
+ assert extract_urls("http://www.brookings.edu/~/media/Research/Files/Papers/2015/04/global-drug-policy/Caulkinsfinal.pdf?la=en") == [
+ "http://www.brookings.edu/~/media/Research/Files/Papers/2015/04/global-drug-policy/Caulkinsfinal.pdf?la=en"
+ ]
+ assert extract_urls("DOI:10.1093/forestry/cpr048") == []
+ assert extract_urls("www.dtic.mil/cgi-bin/GetTRDoc?Location=U2&doc=GetTRDoc.pdf&AD=ADA475228") == [
+ "www.dtic.mil/cgi-bin/GetTRDoc?Location=U2&doc=GetTRDoc.pdf&AD=ADA475228"
+ ]
+ assert extract_urls("http://bit.ly/cJbkv") == ["http://bit.ly/cJbkv"]
+ assert extract_urls("hello http://bit.ly/cJbkv") == ["http://bit.ly/cJbkv"]
+ assert extract_urls("hello http://bit.ly/cJbkv http://bit.ly/cJbkv") == ["http://bit.ly/cJbkv", "http://bit.ly/cJbkv"]
+ assert extract_urls("jul./set.de") == ["set.de"]
+
+
+def test_extract_doi():
+ assert extract_dois("https://doi.org/10.1016/j.jsr.2003.05.009") == ["10.1016/j.jsr.2003.05.009"]
+ assert extract_dois("http://dx.doi.org/10.1002/elps.200500338") == ["10.1002/elps.200500338"]
+
+ assert extract_dois("!!10.1016/j.chiabu.2013.09.002") == ['10.1016/j.chiabu.2013.09.002']
+ assert extract_dois("!!10.1049/joe.2014.0134.!") == ["10.1049/joe.2014.0134"]
+ assert extract_dois("!!10.1080/00335630.2012.714899") == ["10.1080/00335630.2012.714899"]
+ assert extract_dois("!!10.1177/1075547007306508.!") == ["10.1177/1075547007306508"]
+ assert extract_dois("!!445!!10.3390/nu6114822") == ["10.3390/nu6114822"]
+ assert extract_dois("!0141-9889,!pp.!448-464!doi:!10.1111/j.1467J9566.2010.01286.!") == ["10.1111/j.1467J9566.2010.01286"]
+ assert extract_dois("!10.1002/(SICI)1097-4679(200004)56:4<519::AID-JCLP6>3.0.CO") == ["10.1002/(SICI)1097-4679(200004)56:4<519::AID-JCLP6>3.0.CO"]
+ assert extract_dois("!10.1002/ajpa.20674.!") == ["10.1002/ajpa.20674"]
+ assert extract_dois("!10.1002/chem.201700953.!") == ["10.1002/chem.201700953"]