docs and naming

author: Martin Czygan <martin.czygan@gmail.com> 2021-03-31 13:44:16 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-03-31 13:44:16 +0200
commit: 2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc (patch)
tree: e0e5c97de5e9485ad3e6049894b2d9a7bc2eeacd /skate
parent: d66476fac43c83925a6d8a2fe40bf334dcb4331d (diff)
download: refcat-2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc.tar.gz
refcat-2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc.zip
3 files changed, 18 insertions, 16 deletions
diff --git a/skate/cmd/skate-cluster-stats/main.go b/skate/cmd/skate-cluster-stats/main.go
index 3817b7c..4973b4d 100644
--- a/skate/cmd/skate-cluster-stats/main.go
+++ b/skate/cmd/skate-cluster-stats/main.go
@@ -33,7 +33,7 @@ func main() {
 	switch *mode {
 	case "unmatched":
 		f = func(p []byte) ([]byte, error) {
-			var cluster skate.ClusterResult
+			var cluster skate.ReleaseCluster
 			if err := json.Unmarshal(p, &cluster); err != nil {
 				if *bestEffort {
 					log.Printf("%v", err)
@@ -54,7 +54,7 @@ func main() {
 		}
 	case "count":
 		f = func(p []byte) ([]byte, error) {
-			var cluster skate.ClusterResult
+			var cluster skate.ReleaseCluster
 			if err := json.Unmarshal(p, &cluster); err != nil {
 				if *bestEffort {
 					log.Printf("%v", err)
@@ -75,7 +75,7 @@ func main() {
 		}
 	default:
 		f = func(p []byte) ([]byte, error) {
-			var cluster skate.ClusterResult
+			var cluster skate.ReleaseCluster
 			if err := json.Unmarshal(p, &cluster); err != nil {
 				return nil, err
 			}
diff --git a/skate/schema.go b/skate/schema.go
index b4d8242..83ceea3 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -212,7 +212,7 @@ type BiblioRef struct {
 	SourceWikipediaArticle string `json:"source_wikipedia_article,omitempty"`
 	SourceReleaseStage     string `json:"source_release_stage,omitempty"`
 	SourceYear             string `json:"source_year,omitempty"`
-	RefIndex               int64  `json:"ref_index,omitempty"`
+	RefIndex               int64  `json:"ref_index,omitempty"` // 1-based
 	RefKey                 string `json:"ref_key,omitempty"`
 	RefLocator             string `json:"ref_locator,omitempty"`
 	TargetReleaseIdent     string `json:"target_release_ident,omitempty"`
@@ -227,23 +227,24 @@ type BiblioRef struct {
 	TargetCSL              string `json:"target_csl,omitempty"`
 }
 
-// ClusterResult, a list of match candidates. This is typically serialized as a
+// ReleaseCluster, a list of match candidates. This is typically serialized as a
 // single JSON line.
-type ClusterResult struct {
+type ReleaseCluster struct {
 	Key    string     `json:"k"`
 	Values []*Release `json:"v"`
 }
 
-// NonRef returns the first non-reference release found in a cluster, or an
+// OneNonRef returns the first non-reference release found in a cluster, or an
 // error, if none has been found. This depends on converted references using
-// the status "ref" in extra.
-func (cr *ClusterResult) OneNonRef() (*Release, error) {
-	for _, re := range cr.Values {
+// the status "ref" in extra. We use this in mixed clusters (catalog entries
+// and references converted into releases).
+func (rc *ReleaseCluster) OneNonRef() (*Release, error) {
+	for _, re := range rc.Values {
 		if re.Extra.Skate.Status != "ref" {
 			return re, nil
 		}
 	}
-	return nil, fmt.Errorf("no reference/release found")
+	return nil, fmt.Errorf("no reference/release found for cluster key: %v", rc.Key)
 }
 
 // MinimalCitations variant from archive.org/details/wikipedia_citations_2020-07-14.
@@ -268,11 +269,12 @@ type IDList struct {
 
 // IsZero returns true, if none of the identifiers is defined.
 func (l *IDList) IsZero() bool {
-	return l.ISBN == "" && l.DOI == "" && l.PMID == "" && l.ISSN == "" &&
-		l.JSTOR == "" && l.PMC == "" && l.ARXIV == "" && l.OL == ""
+	return *l == IDList{}
 }
 
-// ParseIDList parses out the identifiers from a citation document.
+// ParseIDList parses out the identifiers from a citation document, the IDList
+// values look something like this: "{BIBCODE=1992ApJ...399L..31C,
+// DOI=10.1086/186599}".
 func (c *MinimalCitations) ParseIDList() (result IDList) {
 	if len(c.IDList) < 3 {
 		return result
diff --git a/skate/verify.go b/skate/verify.go
index e6eb8b8..fa9abd1 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -116,7 +116,7 @@ var (
 // with identifiers, match status and reason.
 func RefCluster(p []byte) ([]byte, error) {
 	var (
-		cr  *ClusterResult
+		cr  *ReleaseCluster
 		buf bytes.Buffer
 	)
 	if err := json.Unmarshal(p, &cr); err != nil {
@@ -143,7 +143,7 @@ func RefCluster(p []byte) ([]byte, error) {
 // RefClusterToBiblioRef creates a BiblioRef schema from exact and strong matches.
 func RefClusterToBiblioRef(p []byte) ([]byte, error) {
 	var (
-		cr  *ClusterResult
+		cr  *ReleaseCluster
 		br  *BiblioRef
 		buf bytes.Buffer
 	)
author	Martin Czygan <martin.czygan@gmail.com>	2021-03-31 13:44:16 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-03-31 13:44:16 +0200
commit	2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc (patch)
tree	e0e5c97de5e9485ad3e6049894b2d9a7bc2eeacd /skate
parent	d66476fac43c83925a6d8a2fe40bf334dcb4331d (diff)
download	refcat-2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc.tar.gz refcat-2b5114c1c12008dcc5ef49b7076bb4a4e0f7f8dc.zip