aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-03 11:50:25 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-03 11:50:25 +0200
commit1d0a1f48bb8b3a9544a3abf2ed0bc8d3d6d0bf5a (patch)
tree9b3b1787800cadc520ec81acb37f6c7aa94ae49f
parentf70b8304afbe0edeb570ecde90f05cf1d36cfffa (diff)
downloadrefcat-1d0a1f48bb8b3a9544a3abf2ed0bc8d3d6d0bf5a.tar.gz
refcat-1d0a1f48bb8b3a9544a3abf2ed0bc8d3d6d0bf5a.zip
update docs
-rw-r--r--python/refcat/tasks.py3
-rw-r--r--skate/verify.go9
-rw-r--r--skate/xio/util.go2
3 files changed, 6 insertions, 8 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 885dc58..a7a6834 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -938,8 +938,6 @@ class UnmatchedMapped(Refcat):
"""
Map unmatched refs (converted to release schema on the fly) to container
names to do approximate title matches with OL; 35m14.801s.
-
- XXX: We want normalized container names.
"""
def requires(self):
return RefsWithoutIdentifiers()
@@ -963,7 +961,6 @@ class UnmatchedMapped(Refcat):
class UnmatchedOpenLibraryMatchTable(Refcat):
"""
Run matching and write tabular results to file. About 50M rows.
-
"""
def requires(self):
return {
diff --git a/skate/verify.go b/skate/verify.go
index 18b2f4e..842bedc 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -636,10 +636,11 @@ func doiPrefix(s string) string {
// unifyDigits replaces all digit groups with a hopefully rare placeholder,
// e.g. "<NUM>"; This is for discovering very similar, yet different
-// publications, where e.g. titles differ only by a single char representing a
-// year. Examples are yearly publications, e.g. "World Health Report 2020",
-// where any plain similarity score would yield a high number, yet publications
-// are obviously not the same.
+// publications, where e.g. titles differ only by a single or few chars
+// representing a year. Examples are yearly publications, e.g. "World Health
+// Report 2020", "World Health Report 2021", ... where any plain similarity
+// score would yield a high number, yet publications are obviously not the
+// same.
func unifyDigits(s string) string {
return PatDigits.ReplaceAllString(s, "<NUM>")
}
diff --git a/skate/xio/util.go b/skate/xio/util.go
index de3afb4..9967540 100644
--- a/skate/xio/util.go
+++ b/skate/xio/util.go
@@ -19,7 +19,7 @@ func OpenTwo(f1, f2 string) (g1, g2 *os.File, err error) {
return g1, g2, nil
}
-// TabsToMapFile turns columns from a file into a map.
+// TabsToMapFile turns two columns from a tabular file into a map.
func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, error) {
f, err := os.Open(filename)
if err != nil {