diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-06-01 16:50:00 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-06-01 16:50:00 +0200 |
commit | 2f8438dd054fd055a360fd38df40920ce517bb3f (patch) | |
tree | a8102f61f8d5e6a85cbb032f59287e7bb3f63e47 /skate | |
parent | c973d52dbc55b238f1d608636a42ccae100950a1 (diff) | |
download | refcat-2f8438dd054fd055a360fd38df40920ce517bb3f.tar.gz refcat-2f8438dd054fd055a360fd38df40920ce517bb3f.zip |
add -R to keep only docs with resolved names
Diffstat (limited to 'skate')
-rw-r--r-- | skate/cmd/skate-resolve-journal-name/main.go | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/skate/cmd/skate-resolve-journal-name/main.go b/skate/cmd/skate-resolve-journal-name/main.go index 2351730..d98f166 100644 --- a/skate/cmd/skate-resolve-journal-name/main.go +++ b/skate/cmd/skate-resolve-journal-name/main.go @@ -25,12 +25,13 @@ import ( ) var ( - numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") - batchSize = flag.Int("b", 100000, "batch size") - column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)") - sep = flag.String("d", "\t", "delimiter to use") - bestEffort = flag.Bool("B", false, "only log errors, do not halt") - abbrevFile = flag.String("A", "", "path to abbreviate file") + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)") + sep = flag.String("d", "\t", "delimiter to use") + bestEffort = flag.Bool("B", false, "only log errors, do not halt") + abbrevFile = flag.String("A", "", "path to abbreviate file") + keepResolvedOnly = flag.Bool("R", false, "keep only lines, where we could resolve an abbreviation") ) func main() { @@ -60,12 +61,21 @@ func main() { results := ms.Lookup(name, -1) if len(results) != 1 { // To many or too few matches? We return the document unchanged - return p, nil + if *keepResolvedOnly { + return nil, nil + } else { + return p, nil + } } resolved := m[results[0]] if len(resolved) != 1 { - // Abbreviation mapping to different full names? Skip. - return p, nil + // Abbreviation mapping to different full names? Not sure how + // common, or why, but skip. + if *keepResolvedOnly { + return nil, nil + } else { + return p, nil + } } release.Extra.Skate.ResolvedContainerName = resolved[0] b, err := json.Marshal(release) @@ -73,6 +83,7 @@ func main() { return nil, err } if len(fields) == *column { + // In case this is the last field, we want our newline back. b = append(b, []byte("\n")...) } fields[*column-1] = b |