diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-02-19 01:12:57 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-02-19 01:12:57 +0100 |
commit | 456f318b5ef904786aabf2411d2d244cd38f25b1 (patch) | |
tree | 25e37218b9c4a38fd9588c37dee926ae93dbcd6e /python/fatcat_tools | |
parent | 519b90d7f539b667e919c220a53626e7a4ac48bf (diff) | |
download | fatcat-456f318b5ef904786aabf2411d2d244cd38f25b1.tar.gz fatcat-456f318b5ef904786aabf2411d2d244cd38f25b1.zip |
pubmed ftp: fix url
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/harvest/pubmed.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index da872a10..7afb2dab 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -122,7 +122,7 @@ class PubmedFTPWorker: count = 0 for path in paths: - filename = ftpretr(urljoin(self.host, path)) + filename = ftpretr("ftp://{}".format(urljoin(self.host, path))) for blob in xmlstream(filename, 'PubmedArticle', encoding='utf-8'): soup = BeautifulSoup(blob) pmid = soup.find('PMID') @@ -157,11 +157,13 @@ class PubmedFTPWorker: print("{} DOI ingest caught up".format(self.name)) -class ftpretr(uri): +class ftpretr(url): """ - Fetch (RETR) a remote file to a local temporary file. + Fetch (RETR) a remote file given by its URL (e.g. + "ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/pubmed20n1016.xml.gz") to a + local temporary file. """ - parsed = urlparse(uri) + parsed = urlparse(url) server, path = parsed.netloc, parsed.path ftp = FTP(self.server) ftp.login() |