aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-10 00:10:05 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-10 00:10:05 +0200
commit1737eb03e6c7cd5d316ac081a4cea07787ad4429 (patch)
tree3cb326c9ccdcc006c0ba6c0929943281d0372ab9 /skate
parentf8ee585574d31ab9c7a281ea2db095e37ad83389 (diff)
downloadrefcat-1737eb03e6c7cd5d316ac081a4cea07787ad4429.tar.gz
refcat-1737eb03e6c7cd5d316ac081a4cea07787ad4429.zip
reduce: filter out duplicate wiki links
Diffstat (limited to 'skate')
-rw-r--r--skate/reduce.go8
1 files changed, 8 insertions, 0 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index cd63bb1..255f281 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -158,6 +158,10 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
if target, err = parseRelease(Cut(g.G0[0], 2)); err != nil {
return err
}
+ // Sort out a few duplicates, e.g.
+ // lfqxs3tv_obj3cjr5wrhjffnmgze5jn7a4a,
+ // z2kc233qnfxwszbaojswgzlqorxxe_f7mn45dvyvespbv2pxgyt674k4, ...
+ seen := set.New()
for _, line := range g.G1 {
if wiki, err = parseWiki(Cut(line, 3)); err != nil {
return err
@@ -168,6 +172,10 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
key := fmt.Sprintf("%s_%s",
strings.ToLower(b32enc.EncodeToString([]byte(wiki.PageTitle))),
target.Ident)
+ if seen.Contains(key) {
+ continue
+ }
+ seen.Add(key)
bref.Key = key
bref.SourceWikipediaArticle = wiki.PageTitle
bref.TargetReleaseIdent = target.Ident