diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-27 23:23:53 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-27 23:23:53 +0200 |
commit | 687e6d6a46aeb01c230e2183f465ae0ec074aff7 (patch) | |
tree | db687451f7095a51c97f15178e43d65162fd18f2 | |
parent | 56390b40ce4d5a7e4d0f17a935cf6a9c3d950be6 (diff) | |
download | refcat-687e6d6a46aeb01c230e2183f465ae0ec074aff7.tar.gz refcat-687e6d6a46aeb01c230e2183f465ae0ec074aff7.zip |
update notes
-rw-r--r-- | python/refcat/tasks.py | 2 | ||||
-rw-r--r-- | skate/map.go | 10 |
2 files changed, 5 insertions, 7 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 807b6fd..c76e67c 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -804,7 +804,7 @@ class UnmatchedMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-conv -f ref | - skate-map -m cns -skip-on-empty 1 | + skate-map -m ff -x container_name -skip-on-empty 1 | LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --parallel 4 | zstd -T0 -c > {output} """, diff --git a/skate/map.go b/skate/map.go index 1584acd..316b8ee 100644 --- a/skate/map.go +++ b/skate/map.go @@ -197,21 +197,21 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) { return fields, nil } -// MapperContainerName extracts (container_title, doc). +// MapperContainerName extracts (container_name, doc). func MapperContainerName(p []byte) ([][]byte, error) { var ( - doc ContainerNameDoc + doc PartialDoc key []byte ) if err := json.Unmarshal(p, &doc); err != nil { return nil, err } else { - key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.Biblio.ContainerName))) + key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.ContainerName))) } return [][]byte{key, p}, nil } -// MapperContainerNameSandcrawler extracts (container_title, doc). +// MapperContainerNameSandcrawler extracts (container_name, doc). func MapperContainerNameSandcrawler(p []byte) (fields [][]byte, err error) { if fields, err = MapperContainerName(p); err != nil { return nil, err @@ -242,7 +242,5 @@ func MapperURLFromRef(p []byte) (fields [][]byte, err error) { // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { - // TODO: slugify authors, how to compare two author strings? How do these - // things look like? return nil, nil } |