aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-08 18:36:00 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-08 18:36:00 +0200
commit3525a69869cbf2c4ce75a9a2054a0b33bd914dda (patch)
tree3e8f55dcf249459d0ff11e01016d812e23162323
parent1ad94cc39ee0a6dfcb1fa19f8603b05df8530ae8 (diff)
downloadrefcat-3525a69869cbf2c4ce75a9a2054a0b33bd914dda.tar.gz
refcat-3525a69869cbf2c4ce75a9a2054a0b33bd914dda.zip
note on timings
-rw-r--r--python/notes/version_4.md8
-rw-r--r--python/refcat/tasks.py2
2 files changed, 9 insertions, 1 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md
index 4d8e9e3..e7be1e6 100644
--- a/python/notes/version_4.md
+++ b/python/notes/version_4.md
@@ -883,3 +883,11 @@ igyewr6er5epfozhk7dyfqa5tu igyewr6er5epfozhk7dyfqa5tu exact doi
* linked open library titles: 12394810
* URLs extracted from corpus: 25405592
* sample ratio IA/URL from corpus (N=100000):
+
+----
+
+# Timing
+
+Completed 24/25 jobs in 42h with a few failures from disk space issues.
+
+* 2562m34.844s
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 0db6f86..58e0e38 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1411,7 +1411,7 @@ class UnmatchedResolveJournalNamesMapped(Refcat):
class WikipediaDOI(Refcat):
"""
- Sorted DOI keys from wikipedia.
+ Sorted DOI keys from wikipedia. Takes about a minute.
"""
def requires(self):
return WikipediaCitationsMinimalDataset()