aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cmd
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-01 16:50:00 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-01 16:50:00 +0200
commit2f8438dd054fd055a360fd38df40920ce517bb3f (patch)
treea8102f61f8d5e6a85cbb032f59287e7bb3f63e47 /skate/cmd
parentc973d52dbc55b238f1d608636a42ccae100950a1 (diff)
downloadrefcat-2f8438dd054fd055a360fd38df40920ce517bb3f.tar.gz
refcat-2f8438dd054fd055a360fd38df40920ce517bb3f.zip
add -R to keep only docs with resolved names
Diffstat (limited to 'skate/cmd')
-rw-r--r--skate/cmd/skate-resolve-journal-name/main.go29
1 files changed, 20 insertions, 9 deletions
diff --git a/skate/cmd/skate-resolve-journal-name/main.go b/skate/cmd/skate-resolve-journal-name/main.go
index 2351730..d98f166 100644
--- a/skate/cmd/skate-resolve-journal-name/main.go
+++ b/skate/cmd/skate-resolve-journal-name/main.go
@@ -25,12 +25,13 @@ import (
)
var (
- numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers")
- batchSize = flag.Int("b", 100000, "batch size")
- column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)")
- sep = flag.String("d", "\t", "delimiter to use")
- bestEffort = flag.Bool("B", false, "only log errors, do not halt")
- abbrevFile = flag.String("A", "", "path to abbreviate file")
+ numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers")
+ batchSize = flag.Int("b", 100000, "batch size")
+ column = flag.Int("f", 2, "column to find the release schema document in (1-indexed)")
+ sep = flag.String("d", "\t", "delimiter to use")
+ bestEffort = flag.Bool("B", false, "only log errors, do not halt")
+ abbrevFile = flag.String("A", "", "path to abbreviate file")
+ keepResolvedOnly = flag.Bool("R", false, "keep only lines, where we could resolve an abbreviation")
)
func main() {
@@ -60,12 +61,21 @@ func main() {
results := ms.Lookup(name, -1)
if len(results) != 1 {
// To many or too few matches? We return the document unchanged
- return p, nil
+ if *keepResolvedOnly {
+ return nil, nil
+ } else {
+ return p, nil
+ }
}
resolved := m[results[0]]
if len(resolved) != 1 {
- // Abbreviation mapping to different full names? Skip.
- return p, nil
+ // Abbreviation mapping to different full names? Not sure how
+ // common, or why, but skip.
+ if *keepResolvedOnly {
+ return nil, nil
+ } else {
+ return p, nil
+ }
}
release.Extra.Skate.ResolvedContainerName = resolved[0]
b, err := json.Marshal(release)
@@ -73,6 +83,7 @@ func main() {
return nil, err
}
if len(fields) == *column {
+ // In case this is the last field, we want our newline back.
b = append(b, []byte("\n")...)
}
fields[*column-1] = b