aboutsummaryrefslogtreecommitdiffstats
path: root/skate/schema.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/schema.go')
-rw-r--r--skate/schema.go44
1 files changed, 44 insertions, 0 deletions
diff --git a/skate/schema.go b/skate/schema.go
index a9b1e8a..6c96bb8 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -2,6 +2,7 @@ package skate
import (
"fmt"
+ "regexp"
"strconv"
"strings"
@@ -9,6 +10,11 @@ import (
"git.archive.org/martin/cgraph/skate/set"
)
+var (
+ isbn10Regex = regexp.MustCompile(`[0-9xX -]{10,18}`)
+ isbn13Regex = regexp.MustCompile(`9[0-9xX -]{12,20}`)
+)
+
// RefToRelease converts a ref to a release. Set a extra.skate.status flag to
// be able to distinguish converted entities later.
func RefToRelease(ref *Ref) (*Release, error) {
@@ -41,6 +47,44 @@ func RefToRelease(ref *Ref) (*Release, error) {
contribs[i].RawName = name
}
release.Contribs = contribs
+ // XXX: Find ISBN in unstructured. Might be expensive, do we need a flag?
+ unlo := strings.ToLower(ref.Biblio.Unstructured)
+ if strings.Contains(unlo, "isbn") {
+ // ISBN: 10: 0137822693, pp: 373
+ // Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec,
+ // ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of
+ // Communication. The Bell System Technical Journal. July; October,
+ // Vol. 27, pp. 379-423; 623-656.
+ // Artech House, ISBN: 978-1-60807-201-9, 2011.
+ // ...
+ var (
+ candidates10 = isbn10Regex.FindAllString(ref.Biblio.Unstructured, -1)
+ candidates13 = isbn13Regex.FindAllString(ref.Biblio.Unstructured, -1)
+ valid = set.New()
+ )
+ for _, v := range append(candidates10, candidates13...) {
+ var u []rune
+ for _, c := range v {
+ if c >= '0' && c <= '9' || c == 'x' || c == 'X' {
+ u = append(u, c)
+ }
+ }
+ s := string(u)
+ if !isbn.Validate(s) {
+ continue
+ }
+ if len(s) < 12 {
+ w, err := isbn.To13(s)
+ if err != nil {
+ continue
+ }
+ valid.Add(w)
+ } else {
+ valid.Add(s)
+ }
+ }
+ release.ExtIDs.ISBN = valid.Slice()
+ }
return &release, nil
}