aboutsummaryrefslogtreecommitdiffstats
path: root/skate/reduce.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/reduce.go')
-rw-r--r--skate/reduce.go19
1 files changed, 12 insertions, 7 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index 7d789f5..b28d976 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -155,9 +155,10 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
var (
- target *Release
- wiki *MinimalCitations
- err error
+ target *Release
+ wiki *MinimalCitations
+ key, lang, encodedPage string
+ err error
)
if len(g.G0) == 0 || len(g.G1) == 0 {
return nil
@@ -176,16 +177,20 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
var bref BiblioRef
// We use lowercase base32 w/o padding of the original
// PageTitle as component for the id. XXX: ok for now?
- key := fmt.Sprintf("wikipedia_%s_%s",
- strings.ToLower(b32enc.EncodeToString([]byte(wiki.PageTitle))),
- target.Ident)
+ if wiki.Language == "" {
+ lang = "en"
+ } else {
+ lang = wiki.Language
+ }
+ encodedPage = strings.ToLower(b32enc.EncodeToString([]byte(lang + ":" + wiki.PageTitle)))
+ key = fmt.Sprintf("wikipedia_%s_%s", encodedPage, target.Ident)
if seen.Contains(key) {
continue
}
seen.Add(key)
bref.Key = key
// XXX: We currently only use "en" subset.
- bref.SourceWikipediaArticle = fmt.Sprintf("en:%s", wiki.PageTitle)
+ bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle)
bref.TargetReleaseIdent = target.Ident
bref.TargetWorkIdent = target.WorkID
bref.MatchProvenance = "wikipedia"