aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py2
-rw-r--r--skate/map.go10
2 files changed, 5 insertions, 7 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 807b6fd..c76e67c 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -804,7 +804,7 @@ class UnmatchedMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-conv -f ref |
- skate-map -m cns -skip-on-empty 1 |
+ skate-map -m ff -x container_name -skip-on-empty 1 |
LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --parallel 4 |
zstd -T0 -c > {output}
""",
diff --git a/skate/map.go b/skate/map.go
index 1584acd..316b8ee 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -197,21 +197,21 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) {
return fields, nil
}
-// MapperContainerName extracts (container_title, doc).
+// MapperContainerName extracts (container_name, doc).
func MapperContainerName(p []byte) ([][]byte, error) {
var (
- doc ContainerNameDoc
+ doc PartialDoc
key []byte
)
if err := json.Unmarshal(p, &doc); err != nil {
return nil, err
} else {
- key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.Biblio.ContainerName)))
+ key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.ContainerName)))
}
return [][]byte{key, p}, nil
}
-// MapperContainerNameSandcrawler extracts (container_title, doc).
+// MapperContainerNameSandcrawler extracts (container_name, doc).
func MapperContainerNameSandcrawler(p []byte) (fields [][]byte, err error) {
if fields, err = MapperContainerName(p); err != nil {
return nil, err
@@ -242,7 +242,5 @@ func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
- // TODO: slugify authors, how to compare two author strings? How do these
- // things look like?
return nil, nil
}