aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cmd/skate-from-unstructured/main.go
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-01 00:57:17 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-01 00:57:17 +0200
commit338d6a6c4554a9f7afcb2f572943f276f7912995 (patch)
tree56e526cfc7a0892375fea9eeff54ac8e9922e0b5 /skate/cmd/skate-from-unstructured/main.go
parent304a994951daf3930d0951b80c7ba22103f3a7f0 (diff)
downloadrefcat-338d6a6c4554a9f7afcb2f572943f276f7912995.tar.gz
refcat-338d6a6c4554a9f7afcb2f572943f276f7912995.zip
find doi in url
Diffstat (limited to 'skate/cmd/skate-from-unstructured/main.go')
-rw-r--r--skate/cmd/skate-from-unstructured/main.go19
1 files changed, 12 insertions, 7 deletions
diff --git a/skate/cmd/skate-from-unstructured/main.go b/skate/cmd/skate-from-unstructured/main.go
index 0208d91..8ebc613 100644
--- a/skate/cmd/skate-from-unstructured/main.go
+++ b/skate/cmd/skate-from-unstructured/main.go
@@ -33,16 +33,10 @@ func main() {
if err := json.Unmarshal(p, &ref); err != nil {
return nil, err
}
- // TODO: ref
if err := parseUnstructured(&ref); err != nil {
return nil, err
}
- b, err := json.Marshal(ref)
- if err != nil {
- return nil, err
- }
- b = append(b, bytesNewline...)
- return b, nil
+ return skate.JsonMarshalLine(&ref)
})
pp.NumWorkers = *numWorkers
pp.BatchSize = *batchSize
@@ -74,6 +68,17 @@ func parseUnstructured(ref *skate.Ref) error {
if v != "" && ref.Biblio.DOI == "" {
ref.Biblio.DOI = v
}
+ // DOI in URL
+ prefixes := []string{"http://doi.org/", "https://doi.org/", "http://dx.doi.org/", "https://dx.doi.org/"}
+ for _, prefix := range prefixes {
+ if ref.Biblio.DOI != "" && strings.HasPrefix(ref.Biblio.Url, prefix) {
+ ref.Biblio.DOI = strings.Replace(ref.Biblio.Url, prefix, "", -1)
+ }
+ }
+ v = PatDOINoHyphen.FindString(ref.Key)
+ if v != "" && ref.Biblio.DOI == "" {
+ ref.Biblio.DOI = v
+ }
// Arxiv
vs = PatArxivPDF.FindStringSubmatch(uns)
if len(vs) != 0 && ref.Biblio.ArxivId == "" {