diff options
Diffstat (limited to 'skate/reduce.go')
-rw-r--r-- | skate/reduce.go | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index 7d789f5..b28d976 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -155,9 +155,10 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { var ( - target *Release - wiki *MinimalCitations - err error + target *Release + wiki *MinimalCitations + key, lang, encodedPage string + err error ) if len(g.G0) == 0 || len(g.G1) == 0 { return nil @@ -176,16 +177,20 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error var bref BiblioRef // We use lowercase base32 w/o padding of the original // PageTitle as component for the id. XXX: ok for now? - key := fmt.Sprintf("wikipedia_%s_%s", - strings.ToLower(b32enc.EncodeToString([]byte(wiki.PageTitle))), - target.Ident) + if wiki.Language == "" { + lang = "en" + } else { + lang = wiki.Language + } + encodedPage = strings.ToLower(b32enc.EncodeToString([]byte(lang + ":" + wiki.PageTitle))) + key = fmt.Sprintf("wikipedia_%s_%s", encodedPage, target.Ident) if seen.Contains(key) { continue } seen.Add(key) bref.Key = key // XXX: We currently only use "en" subset. - bref.SourceWikipediaArticle = fmt.Sprintf("en:%s", wiki.PageTitle) + bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle) bref.TargetReleaseIdent = target.Ident bref.TargetWorkIdent = target.WorkID bref.MatchProvenance = "wikipedia" |