aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-08-05 01:01:07 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-08-05 01:01:07 +0200
commite4690545cbef7a83b03010a8aa5a1bc8ecf5111d (patch)
treee7c19b6faac3395974425242fb0a7dafa06d01ba /skate
parent3e75168158461c18d66fb1dee98e69beb8ae8907 (diff)
downloadrefcat-e4690545cbef7a83b03010a8aa5a1bc8ecf5111d.tar.gz
refcat-e4690545cbef7a83b03010a8aa5a1bc8ecf5111d.zip
cleanup: add stub ref cleanup
Diffstat (limited to 'skate')
-rw-r--r--skate/cleanup.go14
-rw-r--r--skate/cmd/skate-cleanup/main.go5
2 files changed, 18 insertions, 1 deletions
diff --git a/skate/cleanup.go b/skate/cleanup.go
index b50e048..c8ce6d1 100644
--- a/skate/cleanup.go
+++ b/skate/cleanup.go
@@ -5,6 +5,7 @@ import (
"log"
"strings"
+ "github.com/segmentio/encoding/json"
"mvdan.cc/xurls/v2"
)
@@ -76,3 +77,16 @@ func (f *FilterDOI) Run(p []byte) ([]byte, error) {
parts[f.Index-1] = strings.ToLower(doi)
return []byte(strings.Join(parts, f.Delimiter)), nil
}
+
+type FilterRawRef struct{}
+
+func (f *FilterRawRef) Run(p []byte) ([]byte, error) {
+ var ref Ref
+ if err := json.Unmarshal(p, &ref); err != nil {
+ return nil, err
+ }
+ if strings.Contains(ref.Biblio.Unstructured, "................") {
+ return nil, nil
+ }
+ return p, nil
+}
diff --git a/skate/cmd/skate-cleanup/main.go b/skate/cmd/skate-cleanup/main.go
index 4dd367f..e2fd27e 100644
--- a/skate/cmd/skate-cleanup/main.go
+++ b/skate/cmd/skate-cleanup/main.go
@@ -25,7 +25,7 @@ var (
index = flag.Int("f", 1, "one field to cleanup up a doi, 1-indexed")
bestEffort = flag.Bool("B", false, "only log errors, but do not stop")
skipNonMatches = flag.Bool("S", false, "do not emit a line for non-matches")
- what = flag.String("c", "doi", "what to clean: doi, url")
+ what = flag.String("c", "doi", "what to clean: doi, url, ref")
extendedCleanup = flag.Bool("X", false, "extended (and slower) cleanup for urls")
allow = flag.String("allow", "http,https", "comma separted list of schemas to allow for urls")
@@ -39,6 +39,9 @@ func main() {
}
var f func([]byte) ([]byte, error)
switch *what {
+ case "ref":
+ filter := skate.FilterRawRef{}
+ f = filter.Run
case "url":
filter := skate.FilterURL{
Delimiter: *delimiter,