From 60f63250aac298b036b1887e18f9db76afdc6f66 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 28 May 2021 01:02:42 +0200 Subject: include a date --- skate/cmd/skate-conv/main.go | 4 ++-- skate/schema.go | 38 ++++++++++++++++++++++++++++++++------ skate/xio/util.go | 2 +- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go index f861cd0..7cbc9bb 100644 --- a/skate/cmd/skate-conv/main.go +++ b/skate/cmd/skate-conv/main.go @@ -31,7 +31,7 @@ var ( // map OL author key to author name, e.g. via: zstdcat -T0 // ol_dump_authors_latest.txt.zst | cut -f 5 | jq -rc '[.key, .name] | // @tsv' - openLibraryAuthorMap = make(map[string]string) + openLibraryAuthorMap = make(map[string]string, 8388608) ) func main() { @@ -52,8 +52,8 @@ func main() { log.Fatal(err) } openLibraryAuthorMap = m + log.Printf("found: %v", len(openLibraryAuthorMap)) } - log.Printf("author mapping: %v", len(openLibraryAuthorMap)) default: log.Fatalf("unsupported input schema: %v", *fromFormat) } diff --git a/skate/schema.go b/skate/schema.go index 54f796a..a4e7ef3 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -6,6 +6,7 @@ import ( "strconv" "strings" "sync" + "time" "git.archive.org/martin/cgraph/skate/isbn" "git.archive.org/martin/cgraph/skate/set" @@ -15,6 +16,16 @@ var ( isbn10Regex = regexp.MustCompile(`[O0-9xX -]{10,18}`) isbn13Regex = regexp.MustCompile(`9[O0-9xX -]{12,20}`) + // E.g. as found in editions, .publish_date. + openLibraryDateLayouts = []string{ + "1 Jan 2006", + "2006", + "2006-02", + "2006-02-01", + "Jan 2, 2006", + "Jan 2006", + } + rune16pool = sync.Pool{ New: func() interface{} { return make([]rune, 0, 16) @@ -217,8 +228,9 @@ type Release struct { } `json:"rg,omitempty"` } `json:"skate,omitempty"` OpenLibrary struct { - HasFulltext bool `json:"has_fulltext,omitempty"` - WorkID string `json:"work,omitempty"` + HasFulltext bool `json:"has_fulltext,omitempty"` + WorkID string `json:"work,omitempty"` + SourceRecords []string `json:"source_records,omitempty"` } `json:"ol,omitempty"` } `json:"extra,omitempty"` } @@ -564,14 +576,28 @@ func OpenLibraryEditionToRelease(v *OpenLibraryEdition, authorMap map[string]str } contribs[i].RawName = name } - if len(v.Works) > 0 { - release.Extra.OpenLibrary.WorkID = v.Works[0].Key - } release.Contribs = contribs - release.Title = v.Title + if v.Subtitle != "" { + release.Title = fmt.Sprintf("%s: %s", v.Title, v.Subtitle) + release.Extra.SubtitleValue = v.Subtitle + } else { + release.Title = v.Title + } release.ExtIDs.ISBN = v.Isbns() if len(v.Publishers) > 0 { release.Publisher = v.Publishers[0] } + if len(v.Works) > 0 { + release.Extra.OpenLibrary.WorkID = v.Works[0].Key + } + release.Extra.OpenLibrary.SourceRecords = v.SourceRecords + for _, l := range openLibraryDateLayouts { + t, err := time.Parse(l, v.PublishDate) + if err != nil { + continue + } + release.ReleaseYearValue = t.Year() + break + } return &release, nil } diff --git a/skate/xio/util.go b/skate/xio/util.go index 8e1669d..ead1a5d 100644 --- a/skate/xio/util.go +++ b/skate/xio/util.go @@ -33,7 +33,7 @@ func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, err func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, error) { var ( br = bufio.NewReader(r) - m = make(map[string]string) + m = make(map[string]string, 8388608) line, k, v string fields []string err error -- cgit v1.2.3