From a0c5bd6fe86b73c5824ea8966f785ec298f7d188 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 13 Jan 2022 19:19:00 +0100 Subject: tasks: sort output of Wikipedia20211201DOI --- python/refcat/tasks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 3489cc1..071184e 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1574,7 +1574,7 @@ class Wikipedia20211201DOI(Refcat): return WikipediaCitations20211201() def run(self): - with self.output().open("w") as output: + with tempfile.NamedTemporaryFile(delete=False) as tf: with self.input().open() as handle: for line in handle: doc = json.loads(line) @@ -1589,7 +1589,9 @@ class Wikipedia20211201DOI(Refcat): reduced["index"] = i reduced["Title"] = ref.get("Title") fields = [doi, doc["page_title"], json.dumps(reduced)] - output.write("\t".join(fields) + "\n") + tf.write("\t".join(fields) + "\n") + output = shellout("LC_ALL=C sort -S30% {input} > {output}", input=tf.name) + luigi.LocalTarget(output).move(self.output().path) def output(self): return luigi.LocalTarget(path=self.path(ext="tsv")) -- cgit v1.2.3