aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/cluster.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-07 00:33:56 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-07 00:33:56 +0100
commit9366af90058d14b1ca046ad89987ee8bade3c003 (patch)
tree396fdbdfd5c468834aafa0ffecd48fec23e22e36 /fuzzycat/cluster.py
parentbafb146d7872be4719aa3c4ab5dba45e571eae1a (diff)
downloadfuzzycat-9366af90058d14b1ca046ad89987ee8bade3c003.tar.gz
fuzzycat-9366af90058d14b1ca046ad89987ee8bade3c003.zip
wip: aux lists and dbs
Diffstat (limited to 'fuzzycat/cluster.py')
-rw-r--r--fuzzycat/cluster.py17
1 files changed, 9 insertions, 8 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index 755e94f..db20320 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -81,15 +81,16 @@ def release_key_title_authors_ngram(doc: KeyDoc) -> Tuple[str, str]:
"""
Derive a key from title and authors. Authors in contribs list:
- "contribs": [
- {
- "index": 0,
- "raw_name": "Meise Botanic Garden",
- "role": "author"
- }
- ],
-
+ "contribs": [
+ {
+ "index": 0,
+ "raw_name": "Meise Botanic Garden",
+ "role": "author"
+ }
+ ],
+ Tokenize title, remote stopwords, lookup first three, lookup last three,
+ plus authors.
"""
# SS: compare ngram sets?