aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cmd/skate-conv/main.go4
-rw-r--r--skate/schema.go38
-rw-r--r--skate/xio/util.go2
3 files changed, 35 insertions, 9 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go
index f861cd0..7cbc9bb 100644
--- a/skate/cmd/skate-conv/main.go
+++ b/skate/cmd/skate-conv/main.go
@@ -31,7 +31,7 @@ var (
// map OL author key to author name, e.g. via: zstdcat -T0
// ol_dump_authors_latest.txt.zst | cut -f 5 | jq -rc '[.key, .name] |
// @tsv'
- openLibraryAuthorMap = make(map[string]string)
+ openLibraryAuthorMap = make(map[string]string, 8388608)
)
func main() {
@@ -52,8 +52,8 @@ func main() {
log.Fatal(err)
}
openLibraryAuthorMap = m
+ log.Printf("found: %v", len(openLibraryAuthorMap))
}
- log.Printf("author mapping: %v", len(openLibraryAuthorMap))
default:
log.Fatalf("unsupported input schema: %v", *fromFormat)
}
diff --git a/skate/schema.go b/skate/schema.go
index 54f796a..a4e7ef3 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -6,6 +6,7 @@ import (
"strconv"
"strings"
"sync"
+ "time"
"git.archive.org/martin/cgraph/skate/isbn"
"git.archive.org/martin/cgraph/skate/set"
@@ -15,6 +16,16 @@ var (
isbn10Regex = regexp.MustCompile(`[O0-9xX -]{10,18}`)
isbn13Regex = regexp.MustCompile(`9[O0-9xX -]{12,20}`)
+ // E.g. as found in editions, .publish_date.
+ openLibraryDateLayouts = []string{
+ "1 Jan 2006",
+ "2006",
+ "2006-02",
+ "2006-02-01",
+ "Jan 2, 2006",
+ "Jan 2006",
+ }
+
rune16pool = sync.Pool{
New: func() interface{} {
return make([]rune, 0, 16)
@@ -217,8 +228,9 @@ type Release struct {
} `json:"rg,omitempty"`
} `json:"skate,omitempty"`
OpenLibrary struct {
- HasFulltext bool `json:"has_fulltext,omitempty"`
- WorkID string `json:"work,omitempty"`
+ HasFulltext bool `json:"has_fulltext,omitempty"`
+ WorkID string `json:"work,omitempty"`
+ SourceRecords []string `json:"source_records,omitempty"`
} `json:"ol,omitempty"`
} `json:"extra,omitempty"`
}
@@ -564,14 +576,28 @@ func OpenLibraryEditionToRelease(v *OpenLibraryEdition, authorMap map[string]str
}
contribs[i].RawName = name
}
- if len(v.Works) > 0 {
- release.Extra.OpenLibrary.WorkID = v.Works[0].Key
- }
release.Contribs = contribs
- release.Title = v.Title
+ if v.Subtitle != "" {
+ release.Title = fmt.Sprintf("%s: %s", v.Title, v.Subtitle)
+ release.Extra.SubtitleValue = v.Subtitle
+ } else {
+ release.Title = v.Title
+ }
release.ExtIDs.ISBN = v.Isbns()
if len(v.Publishers) > 0 {
release.Publisher = v.Publishers[0]
}
+ if len(v.Works) > 0 {
+ release.Extra.OpenLibrary.WorkID = v.Works[0].Key
+ }
+ release.Extra.OpenLibrary.SourceRecords = v.SourceRecords
+ for _, l := range openLibraryDateLayouts {
+ t, err := time.Parse(l, v.PublishDate)
+ if err != nil {
+ continue
+ }
+ release.ReleaseYearValue = t.Year()
+ break
+ }
return &release, nil
}
diff --git a/skate/xio/util.go b/skate/xio/util.go
index 8e1669d..ead1a5d 100644
--- a/skate/xio/util.go
+++ b/skate/xio/util.go
@@ -33,7 +33,7 @@ func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, err
func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, error) {
var (
br = bufio.NewReader(r)
- m = make(map[string]string)
+ m = make(map[string]string, 8388608)
line, k, v string
fields []string
err error