aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-01 00:57:17 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-01 00:57:17 +0200
commit338d6a6c4554a9f7afcb2f572943f276f7912995 (patch)
tree56e526cfc7a0892375fea9eeff54ac8e9922e0b5
parent304a994951daf3930d0951b80c7ba22103f3a7f0 (diff)
downloadrefcat-338d6a6c4554a9f7afcb2f572943f276f7912995.tar.gz
refcat-338d6a6c4554a9f7afcb2f572943f276f7912995.zip
find doi in url
-rw-r--r--skate/README.md3
-rw-r--r--skate/cmd/skate-from-unstructured/main.go19
-rw-r--r--skate/verify.go6
3 files changed, 18 insertions, 10 deletions
diff --git a/skate/README.md b/skate/README.md
index 1962dc6..cc7e238 100644
--- a/skate/README.md
+++ b/skate/README.md
@@ -56,6 +56,9 @@ After this step:
* cluster, e.g. `skate-cluster ...`
### skate-from-unstructured
+
+
+
### skate-ref-to-release
### skate-to-doi
### skate-verify
diff --git a/skate/cmd/skate-from-unstructured/main.go b/skate/cmd/skate-from-unstructured/main.go
index 0208d91..8ebc613 100644
--- a/skate/cmd/skate-from-unstructured/main.go
+++ b/skate/cmd/skate-from-unstructured/main.go
@@ -33,16 +33,10 @@ func main() {
if err := json.Unmarshal(p, &ref); err != nil {
return nil, err
}
- // TODO: ref
if err := parseUnstructured(&ref); err != nil {
return nil, err
}
- b, err := json.Marshal(ref)
- if err != nil {
- return nil, err
- }
- b = append(b, bytesNewline...)
- return b, nil
+ return skate.JsonMarshalLine(&ref)
})
pp.NumWorkers = *numWorkers
pp.BatchSize = *batchSize
@@ -74,6 +68,17 @@ func parseUnstructured(ref *skate.Ref) error {
if v != "" && ref.Biblio.DOI == "" {
ref.Biblio.DOI = v
}
+ // DOI in URL
+ prefixes := []string{"http://doi.org/", "https://doi.org/", "http://dx.doi.org/", "https://dx.doi.org/"}
+ for _, prefix := range prefixes {
+ if ref.Biblio.DOI != "" && strings.HasPrefix(ref.Biblio.Url, prefix) {
+ ref.Biblio.DOI = strings.Replace(ref.Biblio.Url, prefix, "", -1)
+ }
+ }
+ v = PatDOINoHyphen.FindString(ref.Key)
+ if v != "" && ref.Biblio.DOI == "" {
+ ref.Biblio.DOI = v
+ }
// Arxiv
vs = PatArxivPDF.FindStringSubmatch(uns)
if len(vs) != 0 && ref.Biblio.ArxivId == "" {
diff --git a/skate/verify.go b/skate/verify.go
index cfe31ec..40ee4e2 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -123,8 +123,8 @@ var (
PatPages = regexp.MustCompile(`([0-9]{1,})-([0-9]{1,})`)
)
-// jsonMarshalLine marshals a value as JSON and adds a newline.
-func jsonMarshalLine(v interface{}) ([]byte, error) {
+// JsonMarshalLine marshals a value as JSON and adds a newline.
+func JsonMarshalLine(v interface{}) ([]byte, error) {
b, err := json.Marshal(v)
if err != nil {
return nil, err
@@ -230,7 +230,7 @@ func RefClusterToBiblioRef(p []byte) ([]byte, error) {
continue // Assume we already have the DOI matches.
}
br = generateBiblioRef(re, pivot, result, "fuzzy")
- return jsonMarshalLine(br)
+ return JsonMarshalLine(br)
default:
continue
}