diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-06-21 20:13:48 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-06-21 20:13:48 +0200 |
commit | dea840992ea80288760dacb0c9c80bc8b085978f (patch) | |
tree | de43bf75a5ecf6572f54ce585e8d71b8c04d7ef3 | |
parent | 1d24518ddd1b61d8291af2b8ca5b1a5ac7ef705b (diff) | |
download | refcat-dea840992ea80288760dacb0c9c80bc8b085978f.tar.gz refcat-dea840992ea80288760dacb0c9c80bc8b085978f.zip |
add fallback self-link removal
This should be handled at e.g. doi matching time.
-rw-r--r-- | skate/zippy.go | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/skate/zippy.go b/skate/zippy.go index 2317b56..a846a3b 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -354,6 +354,7 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { // exact match, and twice unmatched). // TODO: remove duplicates matched = deduplicateBrefs(matched) + matched = removeSelfLinks(matched) for _, bref := range matched { stats.total++ if err := enc.Encode(bref); err != nil { @@ -369,6 +370,18 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { return err } +// removeSelfLinks removes self-referential links. Those should be caught +// earlier at the root cause later. +func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) { + for _, bref := range brefs { + if bref.SourceReleaseIdent == bref.TargetReleaseIdent { + continue + } + result = append(result, bref) + } + return result +} + // deduplicateBrefs deduplicates by the document id (for elasticsearch), which // may help filter out some duplicates but not all. func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { |