diff options
Diffstat (limited to 'skate')
| -rw-r--r-- | skate/fixtures/ref_with_isbn.json | 13 | ||||
| -rw-r--r-- | skate/schema.go | 44 | ||||
| -rw-r--r-- | skate/schema_test.go | 1 | 
3 files changed, 58 insertions, 0 deletions
diff --git a/skate/fixtures/ref_with_isbn.json b/skate/fixtures/ref_with_isbn.json new file mode 100644 index 0000000..2cd8480 --- /dev/null +++ b/skate/fixtures/ref_with_isbn.json @@ -0,0 +1,13 @@ +{ +  "biblio": { +    "title": "Antibiotic Resistant Bacteria -A Continuous Challenge in the New Millennium Edited by Dr. Marina Pana ISBN", +    "unstructured": "www.intechopen.com Antibiotic Resistant Bacteria -A Continuous Challenge in the New Millennium Edited by Dr. Marina Pana ISBN 978-953-51-0472-8" +  }, +  "index": 443, +  "key": "b443", +  "ref_source": "grobid", +  "release_ident": "n4zvrgchmfexdb6gesxfgxykxi", +  "release_year": 2012, +  "work_ident": "aaan6iujevgpnmcif2hb62uaai" +} + diff --git a/skate/schema.go b/skate/schema.go index a9b1e8a..6c96bb8 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -2,6 +2,7 @@ package skate  import (  	"fmt" +	"regexp"  	"strconv"  	"strings" @@ -9,6 +10,11 @@ import (  	"git.archive.org/martin/cgraph/skate/set"  ) +var ( +	isbn10Regex = regexp.MustCompile(`[0-9xX -]{10,18}`) +	isbn13Regex = regexp.MustCompile(`9[0-9xX -]{12,20}`) +) +  // RefToRelease converts a ref to a release. Set a extra.skate.status flag to  // be able to distinguish converted entities later.  func RefToRelease(ref *Ref) (*Release, error) { @@ -41,6 +47,44 @@ func RefToRelease(ref *Ref) (*Release, error) {  		contribs[i].RawName = name  	}  	release.Contribs = contribs +	// XXX: Find ISBN in unstructured. Might be expensive, do we need a flag? +	unlo := strings.ToLower(ref.Biblio.Unstructured) +	if strings.Contains(unlo, "isbn") { +		// ISBN: 10: 0137822693, pp: 373 +		// Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec, +		// ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of +		// Communication. The Bell System Technical Journal. July; October, +		// Vol. 27, pp. 379-423; 623-656. +		// Artech House, ISBN: 978-1-60807-201-9, 2011. +		// ... +		var ( +			candidates10 = isbn10Regex.FindAllString(ref.Biblio.Unstructured, -1) +			candidates13 = isbn13Regex.FindAllString(ref.Biblio.Unstructured, -1) +			valid        = set.New() +		) +		for _, v := range append(candidates10, candidates13...) { +			var u []rune +			for _, c := range v { +				if c >= '0' && c <= '9' || c == 'x' || c == 'X' { +					u = append(u, c) +				} +			} +			s := string(u) +			if !isbn.Validate(s) { +				continue +			} +			if len(s) < 12 { +				w, err := isbn.To13(s) +				if err != nil { +					continue +				} +				valid.Add(w) +			} else { +				valid.Add(s) +			} +		} +		release.ExtIDs.ISBN = valid.Slice() +	}  	return &release, nil  } diff --git a/skate/schema_test.go b/skate/schema_test.go index 6a95115..c1cec35 100644 --- a/skate/schema_test.go +++ b/skate/schema_test.go @@ -9,6 +9,7 @@ import (  	"github.com/nsf/jsondiff"  ) +// XXX: Work on JSON directly, as structs can get unwieldy.  func TestOpenLibraryToRelease(t *testing.T) {  	var cases = []struct {  		work    OpenLibraryWork  | 
