aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cmd/skate-cluster-stats/main.go6
-rw-r--r--skate/schema.go24
-rw-r--r--skate/verify.go4
3 files changed, 18 insertions, 16 deletions
diff --git a/skate/cmd/skate-cluster-stats/main.go b/skate/cmd/skate-cluster-stats/main.go
index 3817b7c..4973b4d 100644
--- a/skate/cmd/skate-cluster-stats/main.go
+++ b/skate/cmd/skate-cluster-stats/main.go
@@ -33,7 +33,7 @@ func main() {
switch *mode {
case "unmatched":
f = func(p []byte) ([]byte, error) {
- var cluster skate.ClusterResult
+ var cluster skate.ReleaseCluster
if err := json.Unmarshal(p, &cluster); err != nil {
if *bestEffort {
log.Printf("%v", err)
@@ -54,7 +54,7 @@ func main() {
}
case "count":
f = func(p []byte) ([]byte, error) {
- var cluster skate.ClusterResult
+ var cluster skate.ReleaseCluster
if err := json.Unmarshal(p, &cluster); err != nil {
if *bestEffort {
log.Printf("%v", err)
@@ -75,7 +75,7 @@ func main() {
}
default:
f = func(p []byte) ([]byte, error) {
- var cluster skate.ClusterResult
+ var cluster skate.ReleaseCluster
if err := json.Unmarshal(p, &cluster); err != nil {
return nil, err
}
diff --git a/skate/schema.go b/skate/schema.go
index b4d8242..83ceea3 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -212,7 +212,7 @@ type BiblioRef struct {
SourceWikipediaArticle string `json:"source_wikipedia_article,omitempty"`
SourceReleaseStage string `json:"source_release_stage,omitempty"`
SourceYear string `json:"source_year,omitempty"`
- RefIndex int64 `json:"ref_index,omitempty"`
+ RefIndex int64 `json:"ref_index,omitempty"` // 1-based
RefKey string `json:"ref_key,omitempty"`
RefLocator string `json:"ref_locator,omitempty"`
TargetReleaseIdent string `json:"target_release_ident,omitempty"`
@@ -227,23 +227,24 @@ type BiblioRef struct {
TargetCSL string `json:"target_csl,omitempty"`
}
-// ClusterResult, a list of match candidates. This is typically serialized as a
+// ReleaseCluster, a list of match candidates. This is typically serialized as a
// single JSON line.
-type ClusterResult struct {
+type ReleaseCluster struct {
Key string `json:"k"`
Values []*Release `json:"v"`
}
-// NonRef returns the first non-reference release found in a cluster, or an
+// OneNonRef returns the first non-reference release found in a cluster, or an
// error, if none has been found. This depends on converted references using
-// the status "ref" in extra.
-func (cr *ClusterResult) OneNonRef() (*Release, error) {
- for _, re := range cr.Values {
+// the status "ref" in extra. We use this in mixed clusters (catalog entries
+// and references converted into releases).
+func (rc *ReleaseCluster) OneNonRef() (*Release, error) {
+ for _, re := range rc.Values {
if re.Extra.Skate.Status != "ref" {
return re, nil
}
}
- return nil, fmt.Errorf("no reference/release found")
+ return nil, fmt.Errorf("no reference/release found for cluster key: %v", rc.Key)
}
// MinimalCitations variant from archive.org/details/wikipedia_citations_2020-07-14.
@@ -268,11 +269,12 @@ type IDList struct {
// IsZero returns true, if none of the identifiers is defined.
func (l *IDList) IsZero() bool {
- return l.ISBN == "" && l.DOI == "" && l.PMID == "" && l.ISSN == "" &&
- l.JSTOR == "" && l.PMC == "" && l.ARXIV == "" && l.OL == ""
+ return *l == IDList{}
}
-// ParseIDList parses out the identifiers from a citation document.
+// ParseIDList parses out the identifiers from a citation document, the IDList
+// values look something like this: "{BIBCODE=1992ApJ...399L..31C,
+// DOI=10.1086/186599}".
func (c *MinimalCitations) ParseIDList() (result IDList) {
if len(c.IDList) < 3 {
return result
diff --git a/skate/verify.go b/skate/verify.go
index e6eb8b8..fa9abd1 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -116,7 +116,7 @@ var (
// with identifiers, match status and reason.
func RefCluster(p []byte) ([]byte, error) {
var (
- cr *ClusterResult
+ cr *ReleaseCluster
buf bytes.Buffer
)
if err := json.Unmarshal(p, &cr); err != nil {
@@ -143,7 +143,7 @@ func RefCluster(p []byte) ([]byte, error) {
// RefClusterToBiblioRef creates a BiblioRef schema from exact and strong matches.
func RefClusterToBiblioRef(p []byte) ([]byte, error) {
var (
- cr *ClusterResult
+ cr *ReleaseCluster
br *BiblioRef
buf bytes.Buffer
)