diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-07 00:33:56 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-07 00:33:56 +0100 |
commit | 9366af90058d14b1ca046ad89987ee8bade3c003 (patch) | |
tree | 396fdbdfd5c468834aafa0ffecd48fec23e22e36 /fuzzycat/cluster.py | |
parent | bafb146d7872be4719aa3c4ab5dba45e571eae1a (diff) | |
download | fuzzycat-9366af90058d14b1ca046ad89987ee8bade3c003.tar.gz fuzzycat-9366af90058d14b1ca046ad89987ee8bade3c003.zip |
wip: aux lists and dbs
Diffstat (limited to 'fuzzycat/cluster.py')
-rw-r--r-- | fuzzycat/cluster.py | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index 755e94f..db20320 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -81,15 +81,16 @@ def release_key_title_authors_ngram(doc: KeyDoc) -> Tuple[str, str]: """ Derive a key from title and authors. Authors in contribs list: - "contribs": [ - { - "index": 0, - "raw_name": "Meise Botanic Garden", - "role": "author" - } - ], - + "contribs": [ + { + "index": 0, + "raw_name": "Meise Botanic Garden", + "role": "author" + } + ], + Tokenize title, remote stopwords, lookup first three, lookup last three, + plus authors. """ # SS: compare ngram sets? |