aboutsummaryrefslogtreecommitdiffstats
path: root/skate/schema.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/schema.go')
-rw-r--r--skate/schema.go30
1 files changed, 19 insertions, 11 deletions
diff --git a/skate/schema.go b/skate/schema.go
index 8be2f42..118e124 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -158,10 +158,13 @@ func ParseIsbn(s string) []string {
return valid.Slice()
}
-// Release document. Note that we may have varying types for some fields.
-// Mitigation for now is to make the field an interface{}, name the field
-// "...Value" and to add a method with the field name, doing type assertion.
-// Example: ReleaseYearValue interface{}, ReleaseYear() int, etc.
+// Release document. Note that we may have different types for some fields
+// (e.g. string, int, etc.). Mitigation for now is to make the field an
+// interface{}, name the field "...Value" and to add a method with the field
+// name, doing type assertion. Example: ReleaseYearValue interface{},
+// ReleaseYear() int, etc.
+//
+// Extra field gets a section for "skate" for conversion related values.
type Release struct {
ContainerID string `json:"container_id,omitempty"`
ContainerName string `json:"container_name,omitempty"`
@@ -293,14 +296,15 @@ func (r *DataCiteRelation) RelatedIdentifier() string {
}
}
-// Sitemap basic JSON style, e.g. for https://archive.org/details/rg_sitemap_2021_02_23.
-type Sitemap struct {
+// SitemapEntry in a basic JSON style, e.g. for https://archive.org/details/rg_sitemap_2021_02_23.
+type SitemapEntry struct {
Lastmod string `json:"lastmod,omitempty"`
Title string `json:"title,omitempty"`
URL string `json:"url,omitempty"`
}
-// BiblioRef as a prototype for indexing, https://is.gd/yicTom.
+// BiblioRef as a prototype for citation graph elasticsearch indexing,
+// https://is.gd/yicTom.
type BiblioRef struct {
Key string `json:"_id,omitempty"`
IndexedTs string `json:"indexed_ts,omitempty"` // https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html
@@ -326,6 +330,9 @@ type BiblioRef struct {
// ReleaseCluster, a list of match candidates. This is typically serialized as a
// single JSON line containing the match key and a list of release documents.
+//
+// Deprecated, since we are moving to a "two stream" generic "join" style
+// processing.
type ReleaseCluster struct {
Key string `json:"k"`
Values []*Release `json:"v"`
@@ -412,8 +419,9 @@ func (c *MinimalCitations) ParseIDList() (result IDList) {
return result
}
-// OpenLibraryWork from data dump (solr).
-type OpenLibraryWork struct {
+// OpenLibrarySolrDoc from data dump (solr). Note: we most likely only need OL
+// editions.
+type OpenLibrarySolrDoc struct {
AuthorFacet []string `json:"author_facet"`
AuthorKey []string `json:"author_key"`
AuthorName []string `json:"author_name"`
@@ -442,8 +450,8 @@ type OpenLibraryWork struct {
Version int64 `json:"_version_"`
}
-// OpenLibraryWorkToRelease convert OL data into a release.
-func OpenLibraryWorkToRelease(w *OpenLibraryWork) (*Release, error) {
+// OpenLibrarySolrDocToRelease convert OL data into a release.
+func OpenLibrarySolrDocToRelease(w *OpenLibrarySolrDoc) (*Release, error) {
var (
release Release
contribs = make([]struct {