From e4690545cbef7a83b03010a8aa5a1bc8ecf5111d Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 5 Aug 2021 01:01:07 +0200 Subject: cleanup: add stub ref cleanup --- skate/cleanup.go | 14 ++++++++++++++ skate/cmd/skate-cleanup/main.go | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/skate/cleanup.go b/skate/cleanup.go index b50e048..c8ce6d1 100644 --- a/skate/cleanup.go +++ b/skate/cleanup.go @@ -5,6 +5,7 @@ import ( "log" "strings" + "github.com/segmentio/encoding/json" "mvdan.cc/xurls/v2" ) @@ -76,3 +77,16 @@ func (f *FilterDOI) Run(p []byte) ([]byte, error) { parts[f.Index-1] = strings.ToLower(doi) return []byte(strings.Join(parts, f.Delimiter)), nil } + +type FilterRawRef struct{} + +func (f *FilterRawRef) Run(p []byte) ([]byte, error) { + var ref Ref + if err := json.Unmarshal(p, &ref); err != nil { + return nil, err + } + if strings.Contains(ref.Biblio.Unstructured, "................") { + return nil, nil + } + return p, nil +} diff --git a/skate/cmd/skate-cleanup/main.go b/skate/cmd/skate-cleanup/main.go index 4dd367f..e2fd27e 100644 --- a/skate/cmd/skate-cleanup/main.go +++ b/skate/cmd/skate-cleanup/main.go @@ -25,7 +25,7 @@ var ( index = flag.Int("f", 1, "one field to cleanup up a doi, 1-indexed") bestEffort = flag.Bool("B", false, "only log errors, but do not stop") skipNonMatches = flag.Bool("S", false, "do not emit a line for non-matches") - what = flag.String("c", "doi", "what to clean: doi, url") + what = flag.String("c", "doi", "what to clean: doi, url, ref") extendedCleanup = flag.Bool("X", false, "extended (and slower) cleanup for urls") allow = flag.String("allow", "http,https", "comma separted list of schemas to allow for urls") @@ -39,6 +39,9 @@ func main() { } var f func([]byte) ([]byte, error) switch *what { + case "ref": + filter := skate.FilterRawRef{} + f = filter.Run case "url": filter := skate.FilterURL{ Delimiter: *delimiter, -- cgit v1.2.3