From 29c0e34c932f7d085152be6745c554786e1e786a Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sat, 15 Jan 2022 01:16:33 +0100 Subject: skate: minor doc tweaks --- skate/README.md | 12 +++++++----- skate/map.go | 4 +--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'skate') diff --git a/skate/README.md b/skate/README.md index 40a863d..2a816f6 100644 --- a/skate/README.md +++ b/skate/README.md @@ -8,9 +8,11 @@ graph](https://en.wikipedia.org/wiki/Citation_graph). ## Background -Python was a bit too slow, even when parallelized (with GNU parallel), e.g. for -generating clusters of similar documents or to do verification. An option for -the future would be to resort to [Cython](https://cython.org/). Parts of +Python was a bit too slow, even when parallelized (with the great [GNU +parallel](https://www.gnu.org/software/parallel/), e.g. for generating clusters +of similar documents or to do verification. An option for the future would be +to resort to [Cython](https://cython.org/) or +[pyspark](https://spark.apache.org/docs/latest/api/python/index.html). Parts of [fuzzycat](https://git.archive.org/webgroup/fuzzycat) has been ported into this project for performance (and we saw a 25x speedup for certain tasks). @@ -19,8 +21,8 @@ project for performance (and we saw a 25x speedup for certain tasks). ## Overview We follow a map-reduce style approach (on a single machine): We extract -specific keys from data. We group items (via sort) with the same *key* together -and apply some computation on these groups. +specific **keys** from data. We **group** items (via *sort*) with the same +*key* together and **apply** a function over these groups. Mapper is defined as function type, mapping a blob of data (e.g. a single JSON object) to a number of fields (e.g. key, value). diff --git a/skate/map.go b/skate/map.go index 730726d..b9fedad 100644 --- a/skate/map.go +++ b/skate/map.go @@ -381,9 +381,7 @@ func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err erro } func MapperCdxSummary(p []byte) (fields [][]byte, err error) { - var ( - cdx cdxSummary - ) + var cdx cdxSummary if err := json.Unmarshal(p, &cdx); err != nil { return nil, err } -- cgit v1.2.3