From 3525a69869cbf2c4ce75a9a2054a0b33bd914dda Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 8 Jul 2021 18:36:00 +0200 Subject: note on timings --- python/notes/version_4.md | 8 ++++++++ python/refcat/tasks.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/notes/version_4.md b/python/notes/version_4.md index 4d8e9e3..e7be1e6 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -883,3 +883,11 @@ igyewr6er5epfozhk7dyfqa5tu igyewr6er5epfozhk7dyfqa5tu exact doi * linked open library titles: 12394810 * URLs extracted from corpus: 25405592 * sample ratio IA/URL from corpus (N=100000): + +---- + +# Timing + +Completed 24/25 jobs in 42h with a few failures from disk space issues. + +* 2562m34.844s diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 0db6f86..58e0e38 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1411,7 +1411,7 @@ class UnmatchedResolveJournalNamesMapped(Refcat): class WikipediaDOI(Refcat): """ - Sorted DOI keys from wikipedia. + Sorted DOI keys from wikipedia. Takes about a minute. """ def requires(self): return WikipediaCitationsMinimalDataset() -- cgit v1.2.3