aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-10-15 21:56:52 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-10-15 22:04:04 +0200
commit1b4136d415cbe549c735d459cd0ab1345bb37011 (patch)
treee2630189ca57d5b54c4ec21fdc5008e553aca5e6 /python/fatcat_tools/harvest
parent4be667616ae209fa0efaaa2350c1b75eacf0e344 (diff)
downloadfatcat-1b4136d415cbe549c735d459cd0ab1345bb37011.tar.gz
fatcat-1b4136d415cbe549c735d459cd0ab1345bb37011.zip
pubmed: switch default http site to retrieve update files
Proxy started to throw: "dial tcp: lookup ftp.ncbi.nlm.nih.gov on [::1]:53: read udp [::1]:45178->[::1]:53: read: connection refused" NIH has a http version on it's own, try to use that.
Diffstat (limited to 'python/fatcat_tools/harvest')
-rw-r--r--python/fatcat_tools/harvest/pubmed.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index 579dd505..92798a99 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -263,10 +263,12 @@ def ftpretr(url, max_retries=10, retry_delay=1, proxy_hostport=None):
return f.name
-def ftpretr_via_http_proxy(url, proxy_hostport="159.69.240.245:15201", max_retries=10, retry_delay=1):
+def ftpretr_via_http_proxy(url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retries=10, retry_delay=1):
"""
Fetch file from FTP via external HTTP proxy, e.g. ftp.host.com:/a/b/c would
- be retrievable via proxy.com/a/b/c.
+ be retrievable via proxy.com/a/b/c; (in 09/2021 we used
+ "159.69.240.245:15201" as proxy_hostport but that started to fail
+ 2021-10-15; just switch to NIH's http version).
"""
parsed = urlparse(url)
server, path = parsed.netloc, parsed.path