aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-21 20:13:48 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-21 20:13:48 +0200
commitdea840992ea80288760dacb0c9c80bc8b085978f (patch)
treede43bf75a5ecf6572f54ce585e8d71b8c04d7ef3
parent1d24518ddd1b61d8291af2b8ca5b1a5ac7ef705b (diff)
downloadrefcat-dea840992ea80288760dacb0c9c80bc8b085978f.tar.gz
refcat-dea840992ea80288760dacb0c9c80bc8b085978f.zip
add fallback self-link removal
This should be handled at e.g. doi matching time.
-rw-r--r--skate/zippy.go13
1 files changed, 13 insertions, 0 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 2317b56..a846a3b 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -354,6 +354,7 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
// exact match, and twice unmatched).
// TODO: remove duplicates
matched = deduplicateBrefs(matched)
+ matched = removeSelfLinks(matched)
for _, bref := range matched {
stats.total++
if err := enc.Encode(bref); err != nil {
@@ -369,6 +370,18 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
return err
}
+// removeSelfLinks removes self-referential links. Those should be caught
+// earlier at the root cause later.
+func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) {
+ for _, bref := range brefs {
+ if bref.SourceReleaseIdent == bref.TargetReleaseIdent {
+ continue
+ }
+ result = append(result, bref)
+ }
+ return result
+}
+
// deduplicateBrefs deduplicates by the document id (for elasticsearch), which
// may help filter out some duplicates but not all.
func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef {