From dea840992ea80288760dacb0c9c80bc8b085978f Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 21 Jun 2021 20:13:48 +0200 Subject: add fallback self-link removal This should be handled at e.g. doi matching time. --- skate/zippy.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/skate/zippy.go b/skate/zippy.go index 2317b56..a846a3b 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -354,6 +354,7 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { // exact match, and twice unmatched). // TODO: remove duplicates matched = deduplicateBrefs(matched) + matched = removeSelfLinks(matched) for _, bref := range matched { stats.total++ if err := enc.Encode(bref); err != nil { @@ -369,6 +370,18 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { return err } +// removeSelfLinks removes self-referential links. Those should be caught +// earlier at the root cause later. +func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) { + for _, bref := range brefs { + if bref.SourceReleaseIdent == bref.TargetReleaseIdent { + continue + } + result = append(result, bref) + } + return result +} + // deduplicateBrefs deduplicates by the document id (for elasticsearch), which // may help filter out some duplicates but not all. func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { -- cgit v1.2.3