aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-02 01:39:20 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-02 01:39:20 +0200
commit5f44c236f82d3d02f9dd0061f0a0ab3d8010a3f5 (patch)
treec7e44415ab45e3f2e5ee27a8d139e833f78fdd10
parent60dfd396a815705e4e6a52fb9321837f43ed2fc1 (diff)
downloadrefcat-5f44c236f82d3d02f9dd0061f0a0ab3d8010a3f5.tar.gz
refcat-5f44c236f82d3d02f9dd0061f0a0ab3d8010a3f5.zip
tasks: cleanup url list
-rw-r--r--extra/refurls/README.md3
-rw-r--r--python/refcat/tasks.py2
2 files changed, 4 insertions, 1 deletions
diff --git a/extra/refurls/README.md b/extra/refurls/README.md
new file mode 100644
index 0000000..81ddd19
--- /dev/null
+++ b/extra/refurls/README.md
@@ -0,0 +1,3 @@
+# URLs found in References
+
+
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 7c1fabc..4eefdcc 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -278,7 +278,7 @@ class URLList(Refcat):
cut -f 3 |
skate-cleanup -X -c url -B -S -f 1 |
LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% --parallel 4 |
- LC_ALL=C grep ^http |
+ LC_ALL=C grep -E 'https?://'
zstd -T0 -c > {output}
""",
n=self.n,