From 2f8438dd054fd055a360fd38df40920ce517bb3f Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 1 Jun 2021 16:50:00 +0200 Subject: add -R to keep only docs with resolved names --- skate/cmd/skate-resolve-journal-name/main.go | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'skate/cmd') diff --git a/skate/cmd/skate-resolve-journal-name/main.go b/skate/cmd/skate-resolve-journal-name/main.go index 2351730..d98f166 100644 --- a/skate/cmd/skate-resolve-journal-name/main.go +++ b/skate/cmd/skate-resolve-journal-name/main.go @@ -25,12 +25,13 @@ import ( ) var ( - numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") - batchSize = flag.Int("b", 100000, "batch size") - column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)") - sep = flag.String("d", "\t", "delimiter to use") - bestEffort = flag.Bool("B", false, "only log errors, do not halt") - abbrevFile = flag.String("A", "", "path to abbreviate file") + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)") + sep = flag.String("d", "\t", "delimiter to use") + bestEffort = flag.Bool("B", false, "only log errors, do not halt") + abbrevFile = flag.String("A", "", "path to abbreviate file") + keepResolvedOnly = flag.Bool("R", false, "keep only lines, where we could resolve an abbreviation") ) func main() { @@ -60,12 +61,21 @@ func main() { results := ms.Lookup(name, -1) if len(results) != 1 { // To many or too few matches? We return the document unchanged - return p, nil + if *keepResolvedOnly { + return nil, nil + } else { + return p, nil + } } resolved := m[results[0]] if len(resolved) != 1 { - // Abbreviation mapping to different full names? Skip. - return p, nil + // Abbreviation mapping to different full names? Not sure how + // common, or why, but skip. + if *keepResolvedOnly { + return nil, nil + } else { + return p, nil + } } release.Extra.Skate.ResolvedContainerName = resolved[0] b, err := json.Marshal(release) @@ -73,6 +83,7 @@ func main() { return nil, err } if len(fields) == *column { + // In case this is the last field, we want our newline back. b = append(b, []byte("\n")...) } fields[*column-1] = b -- cgit v1.2.3