aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-28 00:00:18 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-28 00:00:18 +0200
commit59afe1e58dcaed9c328d72fcdffb0f669c34d6b6 (patch)
tree0aa02f2561975c65daf0b497279f738ae851b76a
parentcda975f8e1eb6572f62f7a614cdba62573618e23 (diff)
downloadrefcat-59afe1e58dcaed9c328d72fcdffb0f669c34d6b6.tar.gz
refcat-59afe1e58dcaed9c328d72fcdffb0f669c34d6b6.zip
parse out isbn from refs
-rw-r--r--skate/schema.go71
1 files changed, 37 insertions, 34 deletions
diff --git a/skate/schema.go b/skate/schema.go
index 6c96bb8..6f872eb 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -47,45 +47,48 @@ func RefToRelease(ref *Ref) (*Release, error) {
contribs[i].RawName = name
}
release.Contribs = contribs
- // XXX: Find ISBN in unstructured. Might be expensive, do we need a flag?
- unlo := strings.ToLower(ref.Biblio.Unstructured)
- if strings.Contains(unlo, "isbn") {
- // ISBN: 10: 0137822693, pp: 373
- // Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec,
- // ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of
- // Communication. The Bell System Technical Journal. July; October,
- // Vol. 27, pp. 379-423; 623-656.
- // Artech House, ISBN: 978-1-60807-201-9, 2011.
- // ...
- var (
- candidates10 = isbn10Regex.FindAllString(ref.Biblio.Unstructured, -1)
- candidates13 = isbn13Regex.FindAllString(ref.Biblio.Unstructured, -1)
- valid = set.New()
- )
- for _, v := range append(candidates10, candidates13...) {
- var u []rune
- for _, c := range v {
- if c >= '0' && c <= '9' || c == 'x' || c == 'X' {
- u = append(u, c)
- }
+ if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") {
+ release.ExtIDs.ISBN = parseIsbn(ref.Biblio.Unstructured)
+ }
+ return &release, nil
+}
+
+// parseIsbn tries to find and validate ISBN from unstrucuted data.
+func parseIsbn(s string) []string {
+ // ISBN: 10: 0137822693, pp: 373
+ // Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec,
+ // ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of
+ // Communication. The Bell System Technical Journal. July; October,
+ // Vol. 27, pp. 379-423; 623-656.
+ // Artech House, ISBN: 978-1-60807-201-9, 2011.
+ // ...
+ var (
+ candidates10 = isbn10Regex.FindAllString(s, -1)
+ candidates13 = isbn13Regex.FindAllString(s, -1)
+ valid = set.New()
+ )
+ for _, v := range append(candidates10, candidates13...) {
+ var u []rune
+ for _, c := range v {
+ if c >= '0' && c <= '9' || c == 'x' || c == 'X' {
+ u = append(u, c)
}
- s := string(u)
- if !isbn.Validate(s) {
+ }
+ s := string(u)
+ if !isbn.Validate(s) {
+ continue
+ }
+ if len(s) < 12 {
+ w, err := isbn.To13(s)
+ if err != nil {
continue
}
- if len(s) < 12 {
- w, err := isbn.To13(s)
- if err != nil {
- continue
- }
- valid.Add(w)
- } else {
- valid.Add(s)
- }
+ valid.Add(w)
+ } else {
+ valid.Add(s)
}
- release.ExtIDs.ISBN = valid.Slice()
}
- return &release, nil
+ return valid.Slice()
}
// Ref is a reference document, can be very partial.