From 9c7562598fc6c9d58c179816cf98d829e5c1e454 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 28 Jul 2021 22:47:04 +0200 Subject: tasks: remove non-ref sets from Bref task --- python/refcat/tasks.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'python') diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 792a9c3..1463dff 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1180,9 +1180,10 @@ class Bref(Refcat): "pmcid": BrefZipPMCID(), "arxiv": BrefZipArxiv(), "fuzzy": BrefZipFuzzy(), - "openlibrary-isbn": BrefOpenLibraryZipISBN(), - "openlibrary-fuzzy": BrefZipOpenLibrary(), - "wikipedia-doi": BrefZipWikiDOI(), + # TODO: we separate ref from non-ref for fusion + # "openlibrary-isbn": BrefOpenLibraryZipISBN(), + # "openlibrary-fuzzy": BrefZipOpenLibrary(), + # "wikipedia-doi": BrefZipWikiDOI(), } def run(self): @@ -1250,7 +1251,9 @@ class RefsByWorkID(Refcat): class BrefCombined(Refcat): """ - Merge the raw references with our biblioref format, such that + TODO: We'll need another final assembly of ref and non-ref matches. + + Merge the raw references from papers with our biblioref format, such that we include all non-matched items and also consider duplicates. This is basically a reduce step, where we group by work id (since the raw -- cgit v1.2.3