diff options
-rw-r--r-- | python/refcat/tasks.py | 3 | ||||
-rw-r--r-- | skate/verify.go | 9 | ||||
-rw-r--r-- | skate/xio/util.go | 2 |
3 files changed, 6 insertions, 8 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 885dc58..a7a6834 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -938,8 +938,6 @@ class UnmatchedMapped(Refcat): """ Map unmatched refs (converted to release schema on the fly) to container names to do approximate title matches with OL; 35m14.801s. - - XXX: We want normalized container names. """ def requires(self): return RefsWithoutIdentifiers() @@ -963,7 +961,6 @@ class UnmatchedMapped(Refcat): class UnmatchedOpenLibraryMatchTable(Refcat): """ Run matching and write tabular results to file. About 50M rows. - """ def requires(self): return { diff --git a/skate/verify.go b/skate/verify.go index 18b2f4e..842bedc 100644 --- a/skate/verify.go +++ b/skate/verify.go @@ -636,10 +636,11 @@ func doiPrefix(s string) string { // unifyDigits replaces all digit groups with a hopefully rare placeholder, // e.g. "<NUM>"; This is for discovering very similar, yet different -// publications, where e.g. titles differ only by a single char representing a -// year. Examples are yearly publications, e.g. "World Health Report 2020", -// where any plain similarity score would yield a high number, yet publications -// are obviously not the same. +// publications, where e.g. titles differ only by a single or few chars +// representing a year. Examples are yearly publications, e.g. "World Health +// Report 2020", "World Health Report 2021", ... where any plain similarity +// score would yield a high number, yet publications are obviously not the +// same. func unifyDigits(s string) string { return PatDigits.ReplaceAllString(s, "<NUM>") } diff --git a/skate/xio/util.go b/skate/xio/util.go index de3afb4..9967540 100644 --- a/skate/xio/util.go +++ b/skate/xio/util.go @@ -19,7 +19,7 @@ func OpenTwo(f1, f2 string) (g1, g2 *os.File, err error) { return g1, g2, nil } -// TabsToMapFile turns columns from a file into a map. +// TabsToMapFile turns two columns from a tabular file into a map. func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, error) { f, err := os.Open(filename) if err != nil { |