diff options
-rw-r--r-- | python/refcat/tasks.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 3489cc1..071184e 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1574,7 +1574,7 @@ class Wikipedia20211201DOI(Refcat): return WikipediaCitations20211201() def run(self): - with self.output().open("w") as output: + with tempfile.NamedTemporaryFile(delete=False) as tf: with self.input().open() as handle: for line in handle: doc = json.loads(line) @@ -1589,7 +1589,9 @@ class Wikipedia20211201DOI(Refcat): reduced["index"] = i reduced["Title"] = ref.get("Title") fields = [doi, doc["page_title"], json.dumps(reduced)] - output.write("\t".join(fields) + "\n") + tf.write("\t".join(fields) + "\n") + output = shellout("LC_ALL=C sort -S30% {input} > {output}", input=tf.name) + luigi.LocalTarget(output).move(self.output().path) def output(self): return luigi.LocalTarget(path=self.path(ext="tsv")) |