diff options
-rw-r--r-- | skate/schema.go | 10 | ||||
-rw-r--r-- | skate/schema_test.go | 42 |
2 files changed, 48 insertions, 4 deletions
diff --git a/skate/schema.go b/skate/schema.go index 52aa91a..f97b55a 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -59,13 +59,13 @@ func RefToRelease(ref *Ref) (*Release, error) { } release.Contribs = contribs if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") { - release.ExtIDs.ISBN = parseIsbn(ref.Biblio.Unstructured) + release.ExtIDs.ISBN = ParseIsbn(ref.Biblio.Unstructured) } return &release, nil } -// parseIsbn tries to find and validate ISBN from unstructured data. -func parseIsbn(s string) []string { +// ParseIsbn tries to find and validate ISBN from unstructured data. +func ParseIsbn(s string) []string { // ISBN: 10: 0137822693, pp: 373 // Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec, // ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of @@ -126,6 +126,10 @@ type Biblio struct { Url string `json:"url,omitempty"` Volume string `json:"volume,omitempty"` Year int64 `json:"year,omitempty"` + // More non-standard fields go into extra. + Extra struct { + ISBN []string `json:"isbn"` + } `json:"extra"` } // Ref is a reference document, can be very partial. diff --git a/skate/schema_test.go b/skate/schema_test.go index 0f84703..56d2d5b 100644 --- a/skate/schema_test.go +++ b/skate/schema_test.go @@ -198,9 +198,49 @@ func TestParseIsbn(t *testing.T) { }, } for _, c := range cases { - r := parseIsbn(c.s) + r := ParseIsbn(c.s) if !reflect.DeepEqual(r, c.result) { t.Fatalf("got %v, want %v", r, c.result) } } } + +func BenchmarkParseIsbn(b *testing.B) { + for n := 0; n < b.N; n++ { + ParseIsbn("House Pvt. Limited., (2006), ISBN 9788183561426. Date accessed: August 2015.") + } +} + +func BenchmarkRefToRelease(b *testing.B) { + var ref Ref + _ = json.Unmarshal([]byte(` + { + "biblio": { + "arxiv_id": "123", + "container_name": "IEEE Trans. Pattern Anal. Mach. Intell", + "contrib_raw_names": [ + "M Ben-Ezra", + "S K Nayar" + ], + "doi": "123", + "issue": "6", + "pages": "689-698", + "pmcid": "123", + "publisher": "ABC", + "title": "Motion-based motion deblurring", + "unstructured": "M. Ben-Ezra and S. K. Nayar. Motion-based motion deblurring. IEEE Trans. Pattern Anal. Mach. Intell., 26(6):689-698, 2004. 2", + "url": "https://abc.com", + "volume": "26", + "year": 2004 + }, + "index": 0, + "key": "b0", + "ref_source": "grobid", + "release_ident": "26qgat7mzrerjacrlsz3gdmcgy", + "release_year": 2014, + "work_ident": "aaaoe2wcbvdjthnv36dlqgkray" + }`), &ref) + for n := 0; n < b.N; n++ { + _, _ = RefToRelease(&ref) + } +} |