From 456f318b5ef904786aabf2411d2d244cd38f25b1 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 19 Feb 2020 01:12:57 +0100 Subject: pubmed ftp: fix url --- python/fatcat_tools/harvest/pubmed.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index da872a10..7afb2dab 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -122,7 +122,7 @@ class PubmedFTPWorker: count = 0 for path in paths: - filename = ftpretr(urljoin(self.host, path)) + filename = ftpretr("ftp://{}".format(urljoin(self.host, path))) for blob in xmlstream(filename, 'PubmedArticle', encoding='utf-8'): soup = BeautifulSoup(blob) pmid = soup.find('PMID') @@ -157,11 +157,13 @@ class PubmedFTPWorker: print("{} DOI ingest caught up".format(self.name)) -class ftpretr(uri): +class ftpretr(url): """ - Fetch (RETR) a remote file to a local temporary file. + Fetch (RETR) a remote file given by its URL (e.g. + "ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/pubmed20n1016.xml.gz") to a + local temporary file. """ - parsed = urlparse(uri) + parsed = urlparse(url) server, path = parsed.netloc, parsed.path ftp = FTP(self.server) ftp.login() -- cgit v1.2.3