aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/schema.go10
-rw-r--r--skate/schema_test.go42
2 files changed, 48 insertions, 4 deletions
diff --git a/skate/schema.go b/skate/schema.go
index 52aa91a..f97b55a 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -59,13 +59,13 @@ func RefToRelease(ref *Ref) (*Release, error) {
}
release.Contribs = contribs
if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") {
- release.ExtIDs.ISBN = parseIsbn(ref.Biblio.Unstructured)
+ release.ExtIDs.ISBN = ParseIsbn(ref.Biblio.Unstructured)
}
return &release, nil
}
-// parseIsbn tries to find and validate ISBN from unstructured data.
-func parseIsbn(s string) []string {
+// ParseIsbn tries to find and validate ISBN from unstructured data.
+func ParseIsbn(s string) []string {
// ISBN: 10: 0137822693, pp: 373
// Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec,
// ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of
@@ -126,6 +126,10 @@ type Biblio struct {
Url string `json:"url,omitempty"`
Volume string `json:"volume,omitempty"`
Year int64 `json:"year,omitempty"`
+ // More non-standard fields go into extra.
+ Extra struct {
+ ISBN []string `json:"isbn"`
+ } `json:"extra"`
}
// Ref is a reference document, can be very partial.
diff --git a/skate/schema_test.go b/skate/schema_test.go
index 0f84703..56d2d5b 100644
--- a/skate/schema_test.go
+++ b/skate/schema_test.go
@@ -198,9 +198,49 @@ func TestParseIsbn(t *testing.T) {
},
}
for _, c := range cases {
- r := parseIsbn(c.s)
+ r := ParseIsbn(c.s)
if !reflect.DeepEqual(r, c.result) {
t.Fatalf("got %v, want %v", r, c.result)
}
}
}
+
+func BenchmarkParseIsbn(b *testing.B) {
+ for n := 0; n < b.N; n++ {
+ ParseIsbn("House Pvt. Limited., (2006), ISBN 9788183561426. Date accessed: August 2015.")
+ }
+}
+
+func BenchmarkRefToRelease(b *testing.B) {
+ var ref Ref
+ _ = json.Unmarshal([]byte(`
+ {
+ "biblio": {
+ "arxiv_id": "123",
+ "container_name": "IEEE Trans. Pattern Anal. Mach. Intell",
+ "contrib_raw_names": [
+ "M Ben-Ezra",
+ "S K Nayar"
+ ],
+ "doi": "123",
+ "issue": "6",
+ "pages": "689-698",
+ "pmcid": "123",
+ "publisher": "ABC",
+ "title": "Motion-based motion deblurring",
+ "unstructured": "M. Ben-Ezra and S. K. Nayar. Motion-based motion deblurring. IEEE Trans. Pattern Anal. Mach. Intell., 26(6):689-698, 2004. 2",
+ "url": "https://abc.com",
+ "volume": "26",
+ "year": 2004
+ },
+ "index": 0,
+ "key": "b0",
+ "ref_source": "grobid",
+ "release_ident": "26qgat7mzrerjacrlsz3gdmcgy",
+ "release_year": 2014,
+ "work_ident": "aaaoe2wcbvdjthnv36dlqgkray"
+ }`), &ref)
+ for n := 0; n < b.N; n++ {
+ _, _ = RefToRelease(&ref)
+ }
+}