diff options
Diffstat (limited to 'skate/schema.go')
-rw-r--r-- | skate/schema.go | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/skate/schema.go b/skate/schema.go index a9b1e8a..6c96bb8 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -2,6 +2,7 @@ package skate import ( "fmt" + "regexp" "strconv" "strings" @@ -9,6 +10,11 @@ import ( "git.archive.org/martin/cgraph/skate/set" ) +var ( + isbn10Regex = regexp.MustCompile(`[0-9xX -]{10,18}`) + isbn13Regex = regexp.MustCompile(`9[0-9xX -]{12,20}`) +) + // RefToRelease converts a ref to a release. Set a extra.skate.status flag to // be able to distinguish converted entities later. func RefToRelease(ref *Ref) (*Release, error) { @@ -41,6 +47,44 @@ func RefToRelease(ref *Ref) (*Release, error) { contribs[i].RawName = name } release.Contribs = contribs + // XXX: Find ISBN in unstructured. Might be expensive, do we need a flag? + unlo := strings.ToLower(ref.Biblio.Unstructured) + if strings.Contains(unlo, "isbn") { + // ISBN: 10: 0137822693, pp: 373 + // Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec, + // ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of + // Communication. The Bell System Technical Journal. July; October, + // Vol. 27, pp. 379-423; 623-656. + // Artech House, ISBN: 978-1-60807-201-9, 2011. + // ... + var ( + candidates10 = isbn10Regex.FindAllString(ref.Biblio.Unstructured, -1) + candidates13 = isbn13Regex.FindAllString(ref.Biblio.Unstructured, -1) + valid = set.New() + ) + for _, v := range append(candidates10, candidates13...) { + var u []rune + for _, c := range v { + if c >= '0' && c <= '9' || c == 'x' || c == 'X' { + u = append(u, c) + } + } + s := string(u) + if !isbn.Validate(s) { + continue + } + if len(s) < 12 { + w, err := isbn.To13(s) + if err != nil { + continue + } + valid.Add(w) + } else { + valid.Add(s) + } + } + release.ExtIDs.ISBN = valid.Slice() + } return &release, nil } |