aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cmd/skate-conv/main.go6
-rw-r--r--skate/schema.go30
2 files changed, 22 insertions, 14 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go
index 7cbc9bb..334da81 100644
--- a/skate/cmd/skate-conv/main.go
+++ b/skate/cmd/skate-conv/main.go
@@ -93,7 +93,7 @@ func refToRelease(p []byte) ([]byte, error) {
// https://archive.org/download/rg_sitemap_2021_02_23/rg_sitemap_2021_02_23.ndj.zst.
func rgSitemapToRelease(p []byte) ([]byte, error) {
var (
- s skate.Sitemap
+ s skate.SitemapEntry
release skate.Release
err error
)
@@ -118,7 +118,7 @@ func rgSitemapToRelease(p []byte) ([]byte, error) {
// openLibraryToRelease converts an Open Library work item to a release.
func openLibraryToRelease(p []byte) ([]byte, error) {
var (
- w skate.OpenLibraryWork
+ w skate.OpenLibrarySolrDoc
release *skate.Release
err error
)
@@ -129,7 +129,7 @@ func openLibraryToRelease(p []byte) ([]byte, error) {
return nil, err
}
}
- if release, err = skate.OpenLibraryWorkToRelease(&w); err != nil {
+ if release, err = skate.OpenLibrarySolrDocToRelease(&w); err != nil {
return nil, err
}
release.Extra.Skate.Status = "ol"
diff --git a/skate/schema.go b/skate/schema.go
index 8be2f42..118e124 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -158,10 +158,13 @@ func ParseIsbn(s string) []string {
return valid.Slice()
}
-// Release document. Note that we may have varying types for some fields.
-// Mitigation for now is to make the field an interface{}, name the field
-// "...Value" and to add a method with the field name, doing type assertion.
-// Example: ReleaseYearValue interface{}, ReleaseYear() int, etc.
+// Release document. Note that we may have different types for some fields
+// (e.g. string, int, etc.). Mitigation for now is to make the field an
+// interface{}, name the field "...Value" and to add a method with the field
+// name, doing type assertion. Example: ReleaseYearValue interface{},
+// ReleaseYear() int, etc.
+//
+// Extra field gets a section for "skate" for conversion related values.
type Release struct {
ContainerID string `json:"container_id,omitempty"`
ContainerName string `json:"container_name,omitempty"`
@@ -293,14 +296,15 @@ func (r *DataCiteRelation) RelatedIdentifier() string {
}
}
-// Sitemap basic JSON style, e.g. for https://archive.org/details/rg_sitemap_2021_02_23.
-type Sitemap struct {
+// SitemapEntry in a basic JSON style, e.g. for https://archive.org/details/rg_sitemap_2021_02_23.
+type SitemapEntry struct {
Lastmod string `json:"lastmod,omitempty"`
Title string `json:"title,omitempty"`
URL string `json:"url,omitempty"`
}
-// BiblioRef as a prototype for indexing, https://is.gd/yicTom.
+// BiblioRef as a prototype for citation graph elasticsearch indexing,
+// https://is.gd/yicTom.
type BiblioRef struct {
Key string `json:"_id,omitempty"`
IndexedTs string `json:"indexed_ts,omitempty"` // https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html
@@ -326,6 +330,9 @@ type BiblioRef struct {
// ReleaseCluster, a list of match candidates. This is typically serialized as a
// single JSON line containing the match key and a list of release documents.
+//
+// Deprecated, since we are moving to a "two stream" generic "join" style
+// processing.
type ReleaseCluster struct {
Key string `json:"k"`
Values []*Release `json:"v"`
@@ -412,8 +419,9 @@ func (c *MinimalCitations) ParseIDList() (result IDList) {
return result
}
-// OpenLibraryWork from data dump (solr).
-type OpenLibraryWork struct {
+// OpenLibrarySolrDoc from data dump (solr). Note: we most likely only need OL
+// editions.
+type OpenLibrarySolrDoc struct {
AuthorFacet []string `json:"author_facet"`
AuthorKey []string `json:"author_key"`
AuthorName []string `json:"author_name"`
@@ -442,8 +450,8 @@ type OpenLibraryWork struct {
Version int64 `json:"_version_"`
}
-// OpenLibraryWorkToRelease convert OL data into a release.
-func OpenLibraryWorkToRelease(w *OpenLibraryWork) (*Release, error) {
+// OpenLibrarySolrDocToRelease convert OL data into a release.
+func OpenLibrarySolrDocToRelease(w *OpenLibrarySolrDoc) (*Release, error) {
var (
release Release
contribs = make([]struct {