diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2022-01-13 19:19:00 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2022-01-13 19:19:00 +0100 |
commit | a0c5bd6fe86b73c5824ea8966f785ec298f7d188 (patch) | |
tree | 30e1d06c8c9756725fa86ca47f5eaf8903442efc | |
parent | 97b293dbed0b699602d88889224677b6b4e8d7e5 (diff) | |
download | refcat-a0c5bd6fe86b73c5824ea8966f785ec298f7d188.tar.gz refcat-a0c5bd6fe86b73c5824ea8966f785ec298f7d188.zip |
tasks: sort output of Wikipedia20211201DOI
-rw-r--r-- | python/refcat/tasks.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 3489cc1..071184e 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1574,7 +1574,7 @@ class Wikipedia20211201DOI(Refcat): return WikipediaCitations20211201() def run(self): - with self.output().open("w") as output: + with tempfile.NamedTemporaryFile(delete=False) as tf: with self.input().open() as handle: for line in handle: doc = json.loads(line) @@ -1589,7 +1589,9 @@ class Wikipedia20211201DOI(Refcat): reduced["index"] = i reduced["Title"] = ref.get("Title") fields = [doi, doc["page_title"], json.dumps(reduced)] - output.write("\t".join(fields) + "\n") + tf.write("\t".join(fields) + "\n") + output = shellout("LC_ALL=C sort -S30% {input} > {output}", input=tf.name) + luigi.LocalTarget(output).move(self.output().path) def output(self): return luigi.LocalTarget(path=self.path(ext="tsv")) |