diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-08 18:36:00 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-08 18:36:00 +0200 |
commit | 3525a69869cbf2c4ce75a9a2054a0b33bd914dda (patch) | |
tree | 3e8f55dcf249459d0ff11e01016d812e23162323 /python | |
parent | 1ad94cc39ee0a6dfcb1fa19f8603b05df8530ae8 (diff) | |
download | refcat-3525a69869cbf2c4ce75a9a2054a0b33bd914dda.tar.gz refcat-3525a69869cbf2c4ce75a9a2054a0b33bd914dda.zip |
note on timings
Diffstat (limited to 'python')
-rw-r--r-- | python/notes/version_4.md | 8 | ||||
-rw-r--r-- | python/refcat/tasks.py | 2 |
2 files changed, 9 insertions, 1 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md index 4d8e9e3..e7be1e6 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -883,3 +883,11 @@ igyewr6er5epfozhk7dyfqa5tu igyewr6er5epfozhk7dyfqa5tu exact doi * linked open library titles: 12394810 * URLs extracted from corpus: 25405592 * sample ratio IA/URL from corpus (N=100000): + +---- + +# Timing + +Completed 24/25 jobs in 42h with a few failures from disk space issues. + +* 2562m34.844s diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 0db6f86..58e0e38 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1411,7 +1411,7 @@ class UnmatchedResolveJournalNamesMapped(Refcat): class WikipediaDOI(Refcat): """ - Sorted DOI keys from wikipedia. + Sorted DOI keys from wikipedia. Takes about a minute. """ def requires(self): return WikipediaCitationsMinimalDataset() |