aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-02-19 01:12:57 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-02-19 01:12:57 +0100
commit456f318b5ef904786aabf2411d2d244cd38f25b1 (patch)
tree25e37218b9c4a38fd9588c37dee926ae93dbcd6e /python/fatcat_tools/harvest
parent519b90d7f539b667e919c220a53626e7a4ac48bf (diff)
downloadfatcat-456f318b5ef904786aabf2411d2d244cd38f25b1.tar.gz
fatcat-456f318b5ef904786aabf2411d2d244cd38f25b1.zip
pubmed ftp: fix url
Diffstat (limited to 'python/fatcat_tools/harvest')
-rw-r--r--python/fatcat_tools/harvest/pubmed.py10
1 files changed, 6 insertions, 4 deletions
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index da872a10..7afb2dab 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -122,7 +122,7 @@ class PubmedFTPWorker:
count = 0
for path in paths:
- filename = ftpretr(urljoin(self.host, path))
+ filename = ftpretr("ftp://{}".format(urljoin(self.host, path)))
for blob in xmlstream(filename, 'PubmedArticle', encoding='utf-8'):
soup = BeautifulSoup(blob)
pmid = soup.find('PMID')
@@ -157,11 +157,13 @@ class PubmedFTPWorker:
print("{} DOI ingest caught up".format(self.name))
-class ftpretr(uri):
+class ftpretr(url):
"""
- Fetch (RETR) a remote file to a local temporary file.
+ Fetch (RETR) a remote file given by its URL (e.g.
+ "ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/pubmed20n1016.xml.gz") to a
+ local temporary file.
"""
- parsed = urlparse(uri)
+ parsed = urlparse(url)
server, path = parsed.netloc, parsed.path
ftp = FTP(self.server)
ftp.login()