1 files changed, 30 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index c4e5b56..c6bd7e7 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -121,7 +121,7 @@ candidate ref find most likely match in the releases of a journal.
 Also, many partial records do have more information in unstructured; parse this
 out first.
 
-* [ ] OL fuzzy
+* [x] OL fuzzy
 
 Beside 200K links via ISBN, about 10M links via title. Many "year" mismatches,
 which might indicate different editions (debug this later).
@@ -140,7 +140,7 @@ TODO: Unmatched
 ---------------
 
 * raw refs may contain duplicates (e.g. "crossref" and "grobid")
-* refs should appear in order as they are found in the paper
+* refs should appear in order as they are found in the paper; can we guarantee that?
 
 Idea was that "source release ident + ref index" should allow completeness and
 order. "crossref" and "grobid" order may vary.
@@ -1066,6 +1066,34 @@ class BrefZipOpenLibrary(Refcat):
 
 
 #
+# Combined Bref File
+#
+class Bref(Refcat):
+    """
+    Combine bref files from various sources.
+    """
+    def requires(self):
+        return {
+            "doi": BrefZipDOI(),
+            "pmid": BrefZipPMID(),
+            "pmcid": BrefZipPMCID(),
+            "arxiv": BrefZipArxiv(),
+            "fuzzy": BrefZipFuzzy(),
+            "ol-fuzzy": BrefZipOpenLibrary(),
+        }
+
+    def run(self):
+        _, tmpf = tempfile.mkstemp()
+        for k, v in self.input().items():
+            self.logger.debug("adding {}".format(k))
+            shellout("""cat "{}" >> {}""".format(v, tmpf))
+        luigi.LocalTarget(tmpf).move(self.output().path)
+
+    def output(self):
+        return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
+
+#
 # Extra
 # -----
 #