cleanup

author: Martin Czygan <martin.czygan@gmail.com> 2020-10-21 03:54:53 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2020-10-21 03:54:53 +0200
commit: c134c0974d0fc8b57a0d3329d389ac72120a01bb (patch)
tree: 676ccc717e96dcca92e56e2a490c5c0d18240f45
parent: 2cd5ec9f9c3c91dfe79c98f7d73112b88061d383 (diff)
download: fuzzycat-c134c0974d0fc8b57a0d3329d389ac72120a01bb.tar.gz
fuzzycat-c134c0974d0fc8b57a0d3329d389ac72120a01bb.zip
12 files changed, 0 insertions, 1404 deletions
diff --git a/fuzzycat/cleanups.py b/fuzzycat/cleanups.py
deleted file mode 100644
index c2e021d..0000000
--- a/fuzzycat/cleanups.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-Various shared cleanup approaches.
-"""
-
-from fuzzycat.utils import (StringPipeline, normalize_ampersand, normalize_whitespace)
-
-# These transformations should not affect the name or a journal.
-basic = StringPipeline([
-    str.lower,
-    normalize_whitespace,
-    normalize_ampersand,
-    lambda v: v.rstrip("."),
-])
diff --git a/fuzzycat/fatcat/api_auth.py b/fuzzycat/fatcat/api_auth.py
deleted file mode 100644
index 0bad5e9..0000000
--- a/fuzzycat/fatcat/api_auth.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# coding: utf-8
-"""
-API helper, taken from fatcat_tools/api_auth.py
-"""
-
-import os
-import sys
-
-import fatcat_openapi_client
-
-
-def public_api(host_uri):
-    """
-    Note: unlike the authenticated variant, this helper might get called even
-    if the API isn't going to be used, so it's important that it doesn't try to
-    actually connect to the API host or something.
-    """
-    conf = fatcat_openapi_client.Configuration()
-    conf.host = host_uri
-    return fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
-
-
-def authenticated_api(host_uri, token=None):
-    """
-    Note: if this helper is called, it's implied that an actual API connection
-    is needed, so it does try to connect and verify credentials.
-    """
-
-    conf = fatcat_openapi_client.Configuration()
-    conf.host = host_uri
-    if not token:
-        token = os.environ['FATCAT_API_AUTH_TOKEN']
-    if not token:
-        sys.stderr.write(
-            'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n')
-        sys.exit(-1)
-
-    conf.api_key["Authorization"] = token
-    conf.api_key_prefix["Authorization"] = "Bearer"
-    api = fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
-
-    # verify up front that auth is working
-    api.auth_check()
-
-    return api
diff --git a/fuzzycat/fatcat/common.py b/fuzzycat/fatcat/common.py
deleted file mode 100644
index 7499ce4..0000000
--- a/fuzzycat/fatcat/common.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# coding: utf-8
-"""
-Adapter for fatcat and fatcat entities.
-"""
-
-import collections
-from enum import Enum
-from typing import Dict, List, Type, Union
-
-from fatcat_openapi_client import (ApiException, ContainerEntity, DefaultApi, ReleaseEntity,
-                                   ReleaseExtIds, WorkEntity)
-
-from fuzzycat.fatcat.api_auth import public_api
-from fuzzycat.fatcat.entities import entity_from_dict, entity_from_json
-
-
-class MatchStatus(Enum):
-    """
-    When matching two entities, use these levels to express match strength.
-    When in doubt, use AMBIGIOUS. DIFFERENT should be used only, when it is
-    certain, that items do not match.
-    """
-
-    EXACT = 0
-    STRONG = 1
-    WEAK = 2
-    AMBIGIOUS = 3
-    DIFFERENT = 4
-
-
-def compare_ext_ids(a: ReleaseExtIds, b: ReleaseExtIds) -> Dict[str, int]:
-    """
-    Returns a dictionary with number of existing, matching and differing
-    identifier between entity a and b. TODO(martin): It might be helpful to
-    have some mapping service, that would relate qid to doi, or a mag to a
-    jstor id, if this information is known.
-    """
-    counter = collections.Counter({"a": 0, "b": 0, "both": 0, "hits": 0, "misses": 0})
-    attrs = (
-        "doi",
-        "wikidata_qid",
-        "isbn13",
-        "pmid",
-        "pmcid",
-        "core",
-        "arxiv",
-        "jstor",
-        "ark",
-        "mag",
-    )
-    for attr in attrs:
-        v = getattr(a, attr)
-        w = getattr(b, attr)
-        if v:
-            counter["a"] += 1
-        if w:
-            counter["b"] += 1
-        if not v or not w:
-            continue
-        counter["both"] += 1
-        if v == w:
-            counter["hits"] += 1
-        else:
-            counter["misses"] += 1
-    return counter
-
-
-def fetch_container_list(
-    ids: List[str],
-    api: DefaultApi = None,
-) -> List[ContainerEntity]:
-    """
-    Fetch a list of containers from the API.
-    """
-    if api is None:
-        api = public_api("https://api.fatcat.wiki/v0")
-    result = []
-    for id in ids:
-        try:
-            ce = api.get_container(id)
-            result.append(ce)
-        except ApiException as exc:
-            if exc.status == 404:
-                print("[err] failed to fetch container: {}".format(id), file=sys.stderr)
-                continue
-            raise
-    return result
-
-
-def fetch_release_list(
-    ids: List[str],
-    api: DefaultApi = None,
-) -> List[ReleaseEntity]:
-    """
-    Returns a list of entities. Some entities might be missing. Return all that
-    are accessible.
-    """
-    if api is None:
-        api = public_api("https://api.fatcat.wiki/v0")
-    result = []
-    for id in ids:
-        try:
-            re = api.get_release(id, hide="refs,abstracts", expand="container")
-            result.append(re)
-        except ApiException as exc:
-            if exc.status == 404:
-                print("[err] failed to fetch release: {}".format(id), file=sys.stderr)
-                continue
-            raise
-    return result
-
-
-def entity_comparable_attrs(
-    a: Union[ContainerEntity, ReleaseEntity],
-    b: Union[ContainerEntity, ReleaseEntity],
-    entity_type: Union[Type[ContainerEntity], Type[ReleaseEntity]],
-) -> List[str]:
-    """
-    Return a list of top-level attributes, which are defined on both entities
-    (i.e. we could actually compare them).
-    """
-    attrs = entity_type.attribute_map.keys()
-    comparable_attrs = []
-    for attr in attrs:
-        if getattr(a, attr) is None:
-            continue
-        if getattr(b, attr) is None:
-            continue
-        comparable_attrs.append(attr)
-    return comparable_attrs
-
-
-def response_to_entity_list(response, size=5, entity_type=ReleaseEntity, api=None):
-    """
-    Convert an elasticsearch result to a list of entities. Accepts both a
-    dictionary and an elasticsearch_dsl.response.Response.
-
-    We take the ids from elasticsearch and retrieve entities via API.
-    """
-    if isinstance(response, dict):
-        ids = [hit["_source"]["ident"] for hit in response["hits"]["hits"]][:size]
-    elif isinstance(response, elasticsearch_dsl.response.Response):
-        ids = [hit.to_dict().get("ident") for hit in response]
-
-    if entity_type == ReleaseEntity:
-        return fetch_release_list(ids, api=api)
-    if entity_type == ContainerEntity:
-        return fetch_container_list(ids, api=api)
-
-    raise ValueError("invalid entity type: {}".format(entity_type))
-
-
-def exact_release_match(a: ReleaseEntity, b: ReleaseEntity) -> bool:
-    """
-    Currently, entities implement comparison through object dictionaries.
-    """
-    return a == b
-
-
-def exact_work_match(a: WorkEntity, b: WorkEntity) -> bool:
-    """
-    Currently, entities implement comparison through object dictionaries.
-    """
-    return a == b
diff --git a/fuzzycat/fatcat/entities.py b/fuzzycat/fatcat/entities.py
deleted file mode 100644
index 351c2b8..0000000
--- a/fuzzycat/fatcat/entities.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# coding: utf-8
-"""
-This is taken from fatcat_tools/transforms/entities.
-"""
-
-import collections
-import json
-
-import toml
-from fatcat_openapi_client import ApiClient
-
-
-def entity_to_dict(entity, api_client=None) -> dict:
-    """
-    Hack to take advantage of the code-generated serialization code.
-
-    Initializing/destroying ApiClient objects is surprisingly expensive
-    (because it involves a threadpool), so we allow passing an existing
-    instance. If you already have a full-on API connection `api`, you can
-    access the ApiClient object as `api.api_client`. This is such a speed-up
-    that this argument may become mandatory.
-    """
-    if not api_client:
-        api_client = ApiClient()
-    return api_client.sanitize_for_serialization(entity)
-
-
-def entity_from_json(json_str: str, entity_type, api_client=None):
-    """
-    Hack to take advantage of the code-generated deserialization code
-
-    See note on `entity_to_dict()` about api_client argument.
-    """
-    if not api_client:
-        api_client = ApiClient()
-    thing = collections.namedtuple('Thing', ['data'])
-    thing.data = json_str
-    return api_client.deserialize(thing, entity_type)
-
-
-def entity_from_dict(obj: dict, entity_type, api_client=None):
-    json_str = json.dumps(obj)
-    return entity_from_json(json_str, entity_type, api_client=api_client)
-
-
-def entity_to_toml(entity, api_client=None, pop_fields=None) -> str:
-    """
-    pop_fields parameter can be used to strip out some fields from the resulting
-    TOML. Eg, for fields which should not be edited, like the ident.
-    """
-    obj = entity_to_dict(entity, api_client=api_client)
-    pop_fields = pop_fields or []
-    for k in pop_fields:
-        obj.pop(k, None)
-    return toml.dumps(obj)
-
-
-def entity_from_toml(toml_str: str, entity_type, api_client=None):
-    obj = toml.loads(toml_str)
-    return entity_from_dict(obj, entity_type, api_client=api_client)
diff --git a/fuzzycat/fatcat/main.py b/fuzzycat/fatcat/main.py
deleted file mode 100644
index 07e4ad4..0000000
--- a/fuzzycat/fatcat/main.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# coding: utf-8
-"""
-Command line entry point for ad-hoc testing.
-"""
-
-import argparse
-
-from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds
-
-from fuzzycat.fatcat.matching import match_release_fuzzy
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-R", "--release", help="match release", action="store_true")
-    parser.add_argument("-t", "--title", help="title")
-
-    args = parser.parse_args()
-
-    if args.release and args.title:
-        re = ReleaseEntity(title=args.title, ext_ids=ReleaseExtIds())
-        print(match_release_fuzzy(re, es="https://search.fatcat.wiki"))
diff --git a/fuzzycat/fatcat/matching.py b/fuzzycat/fatcat/matching.py
deleted file mode 100644
index 04ec275..0000000
--- a/fuzzycat/fatcat/matching.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# coding: utf-8
-"""
-Public API for fuzzy matches for fatcat.
-
-Match methods return candidates, verify methods return a match status.
-
-    match_containar_fuzzy  -> List[ContainerEntity]
-    match_release_fuzzy    -> List[ReleaseEntity]
-
-    verify_serial_name     -> MatchStatus
-    verify_container_name  -> MatchStatus
-    verify_container_fuzzy -> MatchStatus
-    verify_release_fuzzy   -> MatchStatus
-
-Candidate generation will use external data from search and hence is expensive. Verification is fast.
-"""
-
-from typing import List, Optional, Set, Union
-
-import elasticsearch
-from fatcat_openapi_client import (ApiException, ContainerEntity, DefaultApi, ReleaseEntity,
-                                   ReleaseExtIds, WorkEntity)
-from fatcat_openapi_client.api.default_api import DefaultApi
-
-from fuzzycat import cleanups
-from fuzzycat.fatcat.common import (MatchStatus, compare_ext_ids, response_to_entity_list)
-from fuzzycat.serials import serialsdb
-
-
-def match_container_fuzzy(container: ContainerEntity,
-                          size: int = 5,
-                          es: Optional[Union[str, elasticsearch.client.Elasticsearch]] = None,
-                          api: Optional[DefaultApi] = None) -> List[ContainerEntity]:
-    """
-    Given a container entity, which can be (very) partial, return a list of
-    candidate matches. Elasticsearch can be a hostport or the low level client
-    object.
-
-    Random data point: with 20 parallel workers callind match_container_fuzzy,
-    we get around 40 req/s.
-    """
-    assert isinstance(container, ContainerEntity)
-
-    if size is None or size == 0:
-        size = 10000  # or any large number
-
-    if isinstance(es, str):
-        es = elasticsearch.Elasticsearch([es])
-    if es is None:
-        es = elasticsearch.Elasticsearch()
-
-    # If we find any match by ISSN-L, we return only those.
-    if container.issnl:
-        s = (elasticsearch_dsl.Search(using=es, index="fatcat_container").query(
-            "term", issns=container.issnl).extra(size=size))
-        resp = s.execute()
-        if len(resp) > 0:
-            return response_to_entity_list(resp, entity_type=ContainerEntity, api=api)
-
-    # Do we have an exact QID match?
-    if container.wikidata_qid:
-        s = (elasticsearch_dsl.Search(using=es, index="fatcat_container").query(
-            "term", wikidata_qid=container.wikidata_qid).extra(size=size))
-        resp = s.execute()
-        if len(resp) > 0:
-            return response_to_entity_list(resp, entity_type=ContainerEntity, api=api)
-
-    # Start with exact name match.
-    #
-    # curl -s https://search.fatcat.wiki/fatcat_container/_mapping  | jq .
-    #
-    # "name": {
-    #   "type": "text",
-    #   "copy_to": [
-    #     "biblio"
-    #   ],
-    #   "analyzer": "textIcu",
-    #   "search_analyzer": "textIcuSearch"
-    # },
-    #
-    body = {
-        "query": {
-            "match": {
-                "name": {
-                    "query": container.name,
-                    "operator": "AND"
-                }
-            }
-        },
-        "size": size,
-    }
-    resp = es.search(body=body, index="fatcat_container")
-    if resp["hits"]["total"] > 0:
-        return response_to_entity_list(resp, entity_type=ContainerEntity, api=api)
-
-    # Get fuzzy.
-    # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
-    body = {
-        "query": {
-            "match": {
-                "name": {
-                    "query": container.name,
-                    "operator": "AND",
-                    "fuzziness": "AUTO",
-                }
-            }
-        },
-        "size": size,
-    }
-    resp = es.search(body=body, index="fatcat_container")
-    if resp["hits"]["total"] > 0:
-        return response_to_entity_list(resp, entity_type=ContainerEntity, api=api)
-
-    return []
-
-
-def match_release_fuzzy(release: ReleaseEntity,
-                        size: int = 5,
-                        es: Optional[Union[str, elasticsearch.client.Elasticsearch]] = None,
-                        api: Optional[DefaultApi] = None) -> List[ReleaseEntity]:
-    """
-    Given a release entity, return a number similar release entities from
-    fatcat using Elasticsearch.
-    """
-    assert isinstance(release, ReleaseEntity)
-
-    if size is None or size == 0:
-        size = 10000  # or any large number
-
-    if isinstance(es, str):
-        es = elasticsearch.Elasticsearch([es])
-    if es is None:
-        es = elasticsearch.Elasticsearch()
-
-    # Try to match by external identifier.
-    ext_ids = release.ext_ids
-    attrs = {
-        "doi": "doi",
-        "wikidata_qid": "wikidata_qid",
-        "isbn13": "isbn13",
-        "pmid": "pmid",
-        "pmcid": "pmcid",
-        "core": "code_id",
-        "arxiv": "arxiv_id",
-        "jstor": "jstor_id",
-        "ark": "ark_id",
-        "mag": "mag_id",
-    }
-    for attr, es_field in attrs.items():
-        value = getattr(ext_ids, attr)
-        if not value:
-            continue
-        s = (elasticsearch_dsl.Search(using=es,
-                                      index="fatcat_release").query("term", **{
-                                          es_field: value
-                                      }).extra(size=size))
-        resp = s.execute()
-        if len(resp) > 0:
-            return response_to_entity_list(resp, entity_type=ReleaseEntity, api=api)
-
-    body = {
-        "query": {
-            "match": {
-                "title": {
-                    "query": release.title,
-                    "operator": "AND"
-                }
-            }
-        },
-        "size": size,
-    }
-    resp = es.search(body=body, index="fatcat_release")
-    if resp["hits"]["total"] > 0:
-        return response_to_entity_list(resp, entity_type=ReleaseEntity, api=api)
-
-    # Get fuzzy.
-    # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
-    body = {
-        "query": {
-            "match": {
-                "title": {
-                    "query": release.title,
-                    "operator": "AND",
-                    "fuzziness": "AUTO",
-                }
-            }
-        },
-        "size": size,
-    }
-    resp = es.search(body=body, index="fatcat_release")
-    if resp["hits"]["total"] > 0:
-        return response_to_entity_list(resp, entity_type=ReleaseEntity, api=api)
-
-    return []
-
-
-def verify_serial_name(a: str, b: str) -> MatchStatus:
-    """
-    Serial name verification. Serial names are a subset of container names.
-    There are about 2M serials.
-    """
-    def verify(a: Set[str], b: Set[str]) -> MatchStatus:
-
-        # If any name yields multiple ISSN-L, we cannot decide.
-        if len(a) > 1:
-            return MatchStatus.AMBIGIOUS
-        if len(b) > 1:
-            return MatchStatus.AMBIGIOUS
-
-        # If both names point the same ISSN-L, it is an exact match.
-        if len(a) > 0 and len(a) == len(b):
-            if len(a & b) == len(a):
-                return MatchStatus.EXACT
-            else:
-                return MatchStatus.DIFFERENT
-
-        # Multiple names possible, but there is overlap.
-        if len(a & b) > 0:
-            return MatchStatus.STRONG
-
-        return MatchStatus.AMBIGIOUS
-
-    # First, try values as given.
-    issnls_for_a = serialsdb.get(a, set())
-    issnls_for_b = serialsdb.get(b, set())
-
-    status = verify(issnls_for_a, issnls_for_b)
-    if status != MatchStatus.AMBIGIOUS:
-        return status
-
-    # Try to match slightly cleaned up values.
-    issnls_for_a = serialsdb.get(a, set(), cleanup_pipeline=cleanups.basic)
-    issnls_for_b = serialsdb.get(b, set(), cleanup_pipeline=cleanups.basic)
-
-    return verify(issnls_for_a, issnls_for_b)
-
-
-def verify_container_name(a: str, b: str) -> MatchStatus:
-    status = verify_serial_name(a, b)
-    if status != MatchStatus.AMBIGIOUS:
-        return status
-
-    # TODO: add additional verification, string match and common patterns.
-
-
-def verify_container_match(a: ContainerEntity, b: ContainerEntity) -> MatchStatus:
-    pass
-
-
-def verify_release_match(a: ReleaseEntity, b: ReleaseEntity) -> MatchStatus:
-    assert isinstance(a, ReleaseEntity)
-    assert isinstance(b, ReleaseEntity)
-
-    if a == b:
-        return MatchStatus.EXACT
-
-    a_ext_ids, b_ext_ids = a.ext_ids, b.ext_ids
-    # Compare ext ids, result is a counter, we are interested in "hits" and
-    # "misses", only.
-    cmp_result = compare_ext_ids(a_ext_ids, b_ext_ids)
-
-    # Assume that if more ids match than mismatch, it is a good signal, e.g. if
-    # only a DOI is defined and they match, it is an exact match.
-    if cmp_result["hits"] > 0 and cmp_result["misses"] == 0:
-        return MatchStatus.EXACT
-    if cmp_result["hits"] > cmp_result["misses"]:
-        return MatchStatus.STRONG
-    if cmp_result["hits"] == 0 and cmp_result["misses"] > 0:
-        return MatchStatus.DIFFERENT
-    if cmp_result["hits"] < cmp_result["misses"]:
-        return MatchStatus.AMBIGIOUS
-
-    # TODO: do title verification, apply string cleanups, etc.
diff --git a/fuzzycat/issn.py b/fuzzycat/issn.py
deleted file mode 100644
index aa6b78a..0000000
--- a/fuzzycat/issn.py
+++ /dev/null
@@ -1,401 +0,0 @@
-"""
-Munge the ISSN data so we get some container name test data out of it.
-
-      ...
-      "issn": "0000-0019",
-      "mainTitle": "<U+0098>The <U+009C>publishers weekly.",
-      "name": [
-        "<U+0098>The <U+009C>publishers weekly.",
-        "Publishers weekly"
-      ],
-
-Public data from ISSN via:
-https://portal.issn.org/resource/ISSN/0874-2308?format=json, and issnlister
-(https://github.com/miku/issnlister) to aggregate.
-
-The dataset contains naming variants in "name".
-
-Q1: How many of these variants would our matching algorithm detect?
-
-For that, we need a dataset that generates pairs (a, b) from all names (the
-mainTitle is just one of the name).
-
-Example JSON LD response from ISSN:
-
-{
-  "@context": {
-    "format": {
-      "@id": "http://purl.org/dc/elements/1.1/format",
-      "@type": "@id"
-    },
-    "identifiedBy": {
-      "@id": "http://id.loc.gov/ontologies/bibframe/identifiedBy",
-      "@type": "@id"
-    },
-    "identifier": {
-      "@id": "http://purl.org/dc/elements/1.1/identifier"
-    },
-    "isPartOf": {
-      "@id": "http://schema.org/isPartOf",
-      "@type": "@id"
-    },
-    "issn": {
-      "@id": "http://purl.org/ontology/bibo/issn"
-    },
-    "label": {
-      "@id": "http://www.w3.org/2000/01/rdf-schema#label"
-    },
-    "location": {
-      "@id": "http://schema.org/location",
-      "@type": "@id"
-    },
-    "mainEntity": {
-      "@id": "http://schema.org/mainEntity",
-      "@type": "@id"
-    },
-    "modified": {
-      "@id": "http://purl.org/dc/terms/modified",
-      "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
-    },
-    "name": {
-      "@id": "http://schema.org/name"
-    },
-    "publication": {
-      "@id": "http://schema.org/publication",
-      "@type": "@id"
-    },
-    "status": {
-      "@id": "http://id.loc.gov/ontologies/bibframe/status",
-      "@type": "@id"
-    },
-    "title": {
-      "@id": "http://id.loc.gov/ontologies/bibframe/title",
-      "@type": "@id"
-    },
-    "type": {
-      "@id": "http://purl.org/dc/terms/type",
-      "@type": "@id"
-    },
-    "value": {
-      "@id": "http://www.w3.org/1999/02/22-rdf-syntax-ns#value"
-    },
-    "wasAttributedTo": {
-      "@id": "http://www.w3.org/ns/prov#wasAttributedTo",
-      "@type": "@id"
-    }
-  },
-  "@graph": [
-    {
-      "@id": "http://id.loc.gov/vocabulary/countries/pl",
-      "label": "Poland"
-    },
-    {
-      "@id": "organization/ISSNCenter#57",
-      "@type": "http://schema.org/Organization"
-    },
-    {
-      "@id": "resource/ISSN-L/0001-4125",
-      "identifiedBy": "resource/ISSN/0001-4125#ISSN-L"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125",
-      "@type": [
-        "http://schema.org/Periodical",
-        "http://id.loc.gov/ontologies/bibframe/Instance",
-        "http://id.loc.gov/ontologies/bibframe/Work"
-      ],
-      "format": "vocabularies/medium#Print",
-      "http://schema.org/issn": "0001-4125",
-      "identifiedBy": [
-        "resource/ISSN/0001-4125#ISSN-L",
-        "resource/ISSN/0001-4125#KeyTitle",
-        "resource/ISSN/0001-4125#ISSN"
-      ],
-      "identifier": "0001-4125",
-      "isPartOf": "resource/ISSN-L/0001-4125",
-      "issn": "0001-4125",
-      "name": "Bulletin de l'Académie Polonaise des Sciences. Série des Sciences Techniques",
-      "publication": "resource/ISSN/0001-4125#ReferencePublicationEvent",
-      "title": "resource/ISSN/0001-4125#KeyTitle",
-      "type": "http://marc21rdf.info/terms/formofmaterial#a"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125#ISSN",
-      "@type": "http://id.loc.gov/ontologies/bibframe/Issn",
-      "status": "vocabularies/IdentifierStatus#Valid",
-      "value": "0001-4125"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125#ISSN-L",
-      "@type": "http://id.loc.gov/ontologies/bibframe/IssnL",
-      "status": "vocabularies/IdentifierStatus#Valid",
-      "value": "0001-4125"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125#KeyTitle",
-      "@type": [
-        "http://id.loc.gov/ontologies/bibframe/Identifier",
-        "http://id.loc.gov/ontologies/bibframe/KeyTitle"
-      ],
-      "value": "Bulletin de l'Académie Polonaise des Sciences. Série des Sciences Techniques"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125#Record",
-      "@type": "http://schema.org/CreativeWork",
-      "mainEntity": "resource/ISSN/0001-4125",
-      "modified": "20051223105700.0",
-      "status": "vocabularies/RecordStatus#Register",
-      "wasAttributedTo": "organization/ISSNCenter#57"
-    },
-    {
-      "@id": "resource/ISSN/0001-4125#ReferencePublicationEvent",
-      "@type": "http://schema.org/PublicationEvent",
-      "location": "http://id.loc.gov/vocabulary/countries/pl"
-    }
-  ]
-}
-
-"""
-
-import argparse
-import collections
-import itertools
-import json
-import os
-import re
-import shelve
-import sys
-from typing import Any, Callable, Dict, Generator, Iterable, List, Tuple, Union
-
-from simhash import Simhash
-
-from fuzzycat import cleanups
-from fuzzycat.utils import SetEncoder
-
-
-def listify(v: Union[str, List[str]]) -> List[str]:
-    """
-    Sensible create a list.
-    """
-    if v is None:
-        return []
-    if isinstance(v, str):
-        return [v]
-    return v
-
-
-def jsonld_minimal(v: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Turn a JSON from issn.org into a smaller dict with a few core fields.  Will
-    fail, if no ISSN-L is found in the input.
-
-    {
-      "issnl": "0001-4125",
-      "material": [],
-      "issns": [
-	"0001-4125"
-      ],
-      "urls": [],
-      "names": [
-	"Bulletin de l'Académie Polonaise des Sciences. Série des Sciences Techniques"
-      ]
-    }
-
-    """
-    items = v.get("@graph")
-    if not items:
-        return {}
-    doc = {}
-    for item in items:
-        # "@id": "resource/ISSN-L/0001-4125"
-        # "@id": "resource/ISSN/0001-4125"
-        # ...
-        id = item.get("@id")
-        if not id:
-            continue
-
-        # ISSN-L
-        match = re.match(r"^resource/ISSN-L/([0-9]{4,4}-[0-9]{3,3}[0-9xX])$", id)
-        if match:
-            doc["issnl"] = match.group(1)
-            continue
-
-        # The "main" issn entry.
-        match = re.match(r"^resource/ISSN/([0-9]{4,4}-[0-9]{3,3}[0-9xX])$", id)
-        if match:
-            # if we do not have ISSN-L yet, check "exampleOfWork",
-            # "resource/ISSN/2658-0705"
-            if not "issnl" in doc:
-                match = re.match(r"^resource/ISSN-L/([0-9]{4,4}-[0-9]{3,3}[0-9xX])$",
-                                 item.get("exampleOfWork", ""))
-                if match:
-                    doc["issnl"] = match.group(1)
-
-            # note material
-            doc["material"] = listify(item.get("material", []))
-
-            # collect ids
-            issns = set([match.group(1)])
-            if item.get("identifier"):
-                issns.add(item.get("identifier"))
-            if item.get("issn"):
-                issns.add(item.get("issn"))
-            doc["issns"] = issns
-            # add urls
-            doc["urls"] = listify(item.get("url", []))
-            # add names, variants
-            names = listify(item.get("name")) + listify(item.get("alternateName"))
-            doc["names"] = list(set(names))
-
-            # add related issn
-            for v in listify(item.get("isFormatOf", [])):
-                match = re.match(r"^resource/ISSN/([0-9]{4,4}-[0-9]{3,3}[0-9xX])$", v)
-                if match:
-                    doc["issns"].add(match.group(1))
-
-    if "issnl" not in doc:
-        raise ValueError("entry without issnl: {}".format(item))
-
-    return doc
-
-
-def de_jsonld(lines: Iterable):
-    """
-    Batch convert jsonld to minimal JSON and write to stdout.
-    """
-    for line in lines:
-        line = line.strip()
-        try:
-            doc = jsonld_minimal(json.loads(line))
-        except json.decoder.JSONDecodeError as exc:
-            print("failed to parse json: {}, data: {}".format(exc, line), file=sys.stderr)
-            continue
-        else:
-            print(json.dumps(doc, cls=SetEncoder))
-
-
-def generate_name_pairs(lines: Iterable,
-                        cleanup_pipeline: Callable[[str], str] = None,
-                        keep_original: bool = True) -> Generator[Tuple[str, str, str], None, None]:
-    """
-    Given JSON lines, yield a tuple (issnl, a, b) of test data. Will skip on
-    errors. Proto unit test data.
-
-    Example output:
-
-    0013-211X       Eendracht-bode (Tholen) Eendracht-bode.
-    0012-7388       Dynamic maturity        Dynamic maturity.
-    0012-6055       Drehpunkt.      Drehpunkt (Basel. 1968)
-
-    Basically, these would be free test cases, since we would like to report
-    "match" on most of these.
-
-    That can be useful to detect various scripts refering to the same journal.
-
-    0040-2249       Tehnika kino i televideniâ.     Tehnika kino i televideniâ
-    0040-2249       Tehnika kino i televideniâ.     Техника кино и телевидения
-    0040-2249       Tehnika kino i televideniâ.     Техника кино и телевидения.
-    0040-2249       Tehnika kino i televideniâ      Техника кино и телевидения
-    0040-2249       Tehnika kino i televideniâ      Техника кино и телевидения.
-    0040-2249       Техника кино и телевидения      Техника кино и телевидения.
-
-    If cleanup_pipeline is given, additionally add
-    """
-    for line in lines:
-        line = line.strip()
-        try:
-            doc = jsonld_minimal(json.loads(line))
-        except json.decoder.JSONDecodeError as exc:
-            print("failed to parse json: {}, data: {}".format(exc, line), file=sys.stderr)
-            continue
-        for a, b in itertools.combinations(doc.get("names", []), 2):
-            if cleanup_pipeline is None or (cleanup_pipeline is not None and keep_original):
-                yield (doc["issnl"], a, b)
-            if cleanup_pipeline:
-                a = cleanup_pipeline(a)
-                b = cleanup_pipeline(b)
-                yield (doc["issnl"], a, b)
-
-
-def generate_name_issn_mapping(lines: Iterable, cleanup_pipeline: Callable[[str], str] = None):
-    """
-    Given JSON lines, generate a dictionary mapping names sets of ISSN. Names
-    might be reused.
-    """
-    mapping = collections.defaultdict(set)
-    for issnl, a, b in generate_name_pairs(lines, cleanup_pipeline=cleanup_pipeline):
-        mapping[a].add(issnl)
-        mapping[b].add(issnl)
-    return mapping
-
-
-def generate_shelve(lines: Iterable, output: str, cleanup_pipeline: Callable[[str], str] = None):
-    """
-    Generate a persistent key value store from name issn mappings. 5015523
-    entries, 1.1G take about 5min.
-    """
-    with shelve.open(output) as db:
-        mapping = generate_name_issn_mapping(lines, cleanup_pipeline=cleanup_pipeline)
-        for name, issnls in mapping.items():
-            db[name] = issnls
-        print("wrote {} keys to {}".format(len(db), output), file=sys.stderr)
-
-
-def generate_simhash(lines: Iterable):
-    """
-    Print TSV with simhash values.
-
-    Match and non-match count.
-
-    1069447 1
-     927120 0
-    """
-    for issnl, a, b in generate_name_pairs(lines):
-        ha = Simhash(a).value
-        hb = Simhash(b).value
-        row = (issnl, 0 if ha == hb else 1, ha, hb)
-        print("\t".join([str(v) for v in row]))
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("file",
-                        default=sys.stdin,
-                        type=argparse.FileType("r"),
-                        help="public data from issn, one JSON object per line")
-    parser.add_argument("--make-pairs",
-                        action="store_true",
-                        help="generate TSV and write to stdout")
-    parser.add_argument("--make-mapping",
-                        action="store_true",
-                        help="generate JSON mapping from name to list of ISSN")
-    parser.add_argument("--make-shelve",
-                        action="store_true",
-                        help="generate trie mapping from name to list of ISSN")
-    parser.add_argument("--make-simhash", action="store_true", help="print out simhash value")
-    parser.add_argument("-o",
-                        "--output",
-                        type=str,
-                        default="output.file",
-                        help="write output to file")
-    parser.add_argument("-c", "--cleanup", type=str, default=None, help="cleanup pipeline name")
-    parser.add_argument("--de-jsonld", action="store_true", help="break up the jsonld")
-
-    args = parser.parse_args()
-
-    # Add additional cleanup routines here.
-    cleanup = dict(basic=cleanups.basic).get(args.cleanup)
-
-    if args.make_mapping:
-        print(
-            json.dumps(generate_name_issn_mapping(args.file, cleanup_pipeline=cleanup),
-                       cls=SetEncoder))
-    if args.make_pairs:
-        for issn, a, b in generate_name_pairs(args.file, cleanup_pipeline=cleanup):
-            print("{}\t{}\t{}".format(issn, a, b))
-    if args.de_jsonld:
-        de_jsonld(args.file)
-    if args.make_shelve:
-        generate_shelve(args.file, output=args.output, cleanup_pipeline=cleanup)
-    if args.make_simhash:
-        generate_simhash(args.file)
diff --git a/fuzzycat/serials.py b/fuzzycat/serials.py
deleted file mode 100644
index 2f1782d..0000000
--- a/fuzzycat/serials.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding: utf-8
-"""
-Serial name matching. Includes names from issn database.
-"""
-
-import os
-import shelve
-
-__all__ = ["serialsdb"]
-
-
-class SerialsDatabase:
-    """
-    Lookup allows to lookup serial names, using a database of real serial names.
-
-        >>> from serials import serialsdb
-        >>> serialsdb.get("Philosophica")
-        {'1857-9272', '2232-299X', '2232-3007', '2232-3015'}
-
-    """
-    def __init__(self, path=None):
-        """
-        Note that shelve appends "db" to the name automatically. TODO: make this
-        auto-download into a cache directory.
-        """
-        if path is None:
-            path = os.path.join(os.path.expanduser("~"), ".cache/fuzzycat/names")
-        self.db = shelve.open(path, flag='r')
-
-    def __getitem__(self, v):
-        return self.db[v]
-
-    def get(self, v, default=None, cleanup_pipeline=None):
-        if not cleanup_pipeline:
-            return self.db.get(v, default=default)
-        return self.db.get(cleanup_pipeline(v), default=default)
-
-    def close(self):
-        self.db.close()
-
-
-# A singleton.
-serialsdb = SerialsDatabase()
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
deleted file mode 100644
index 9d2a2f7..0000000
--- a/fuzzycat/utils.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# coding: utf-8
-
-import collections
-import itertools
-import json
-import re
-import string
-from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence
-"""
-A couple of utilities, may be split up into separate modules.
-"""
-
-
-class SetEncoder(json.JSONEncoder):
-    """
-    Helper to encode python sets into JSON lists.
-    So you can write something like this:
-        json.dumps({"things": set([1, 2, 3])}, cls=SetEncoder)
-    """
-    def default(self, obj):
-        """
-        Decorate call to standard implementation.
-        """
-        if isinstance(obj, set):
-            return list(obj)
-        return json.JSONEncoder.default(self, obj)
-
-
-class StringPipeline:
-    """
-    Minimalistic grouping of functions applied on an input string to produce
-    some cleaned or normalized output. Pipeline functions are Func[[str], str].
-
-        >>> cleanups = StringPipeline([
-        ...     str.lower,
-        ...     remove_html_tags,
-        ...     normalize_whitespace,
-        ...     normalize_ampersand,
-        ... ])
-        >>> cleanups("<a>Input  & Output</a>")
-        input and output
-
-    """
-    def __init__(self, fs: List[Callable[[str], str]]):
-        self.fs = fs
-
-    def __call__(self, s: str) -> str:
-        return self.run(s)
-
-    def run(self, s: str) -> str:
-        """
-        Apply all function and return result. Deprecated: just call the object.
-        """
-        for f in self.fs:
-            s = f(s)
-        return s
-
-
-class StringAnnotator:
-    """
-    Experimental, rationale: In some way, feature engineering; we want to
-    derive metrics, number from the string, do this consistently and compactly.
-    E.g. once we have dozens of "speaking" characteristics, a case based method
-    might become more readble.
-
-    if s.is_single_token and s.some_ratio > 0.4:
-        return MatchStatus.AMBIGIOUS
-
-    Could also subclass string and pluck more methods on it (might be even
-    reusable).
-
-    ....
-
-    Given a string, derive a couple of metrics, based on functions. The
-    annotation is a dict, mapping an annotation key to a value of any type.
-
-        >>> metrics = StringAnnotator([
-        ...    has_html_tags,
-        ...    has_only_printable_characters,
-        ...    is_single_token,
-        ...    length,
-        ...    has_year_in_parentheses,
-        ... ])
-        >>> metrics.run("Journal of Pataphysics 2038-2032")
-        {"value": "Journal of Pataphysics 2038-2032", "is_single_token": False, ... }
-
-    TODO(martin):
-
-    * SimpleNamespace, dotdict, Dataclass.
-    * string_utils.py or similar
-    * maybe adopt SpaCy or similar
-    """
-    def __init__(self, fs: List[Callable[[str], Dict[str, Any]]]):
-        self.fs = fs
-
-    def run(self, s: str) -> Dict[str, Any]:
-        annotations: DefaultDict[str, Any] = collections.defaultdict(dict)
-        for f in self.fs:
-            result = f(s)
-            annotations.update(result)
-        return annotations
-
-
-def normalize_whitespace(s: str) -> str:
-    """
-    Remove trailing spaces and normalize whitespace.
-    """
-    return re.sub(r"\s{2,}", " ", s.strip())
-
-
-def normalize_ampersand(s: str) -> str:
-    """
-    Normalize ampersand to and.
-    """
-    return s.replace(" & ", " and ")
-
-
-def letter_to_non_letter_ratio(s: str) -> float:
-    """
-    Non letters are defined by printable w/o letters.
-    """
-    if len(s) == 0:
-        return 0.0
-    non_letters = set(string.printable) - set(string.ascii_letters)
-    non_letter_count = sum(c in non_letters for c in s)
-    return non_letter_count / len(s)
-
-
-def alphanumeric_ratio(s: str) -> float:
-    """
-    Ratio of letters, digit and whitespace to total string length.
-    """
-    if len(s) == 0:
-        return 0.0
-    alphanumeric = set(string.ascii_letters) | set(string.digits) | set([" "])
-    alphanumeric_count = sum(c in alphanumeric for c in s)
-    return alphanumeric_count / len(s)
-
-
-def alphanumeric_only(s: str) -> str:
-    """
-    Remove all non-alphanumeric content from string.
-    """
-    alphanumeric = set(string.ascii_letters) | set(string.digits) | set([" "])
-    return "".join((c for c in s if c in alphanumeric))
-
-
-def parenthesized_year(s: str) -> Optional[str]:
-    """
-    Return the year only, if it is in parentheses, e.g. Hello (2020).
-    """
-    match = re.search(r"[\(\[]\s*([12][\d]{3})\s*[\]\)]", s)
-    if match:
-        return match.group(1)
-    return None
-
-
-def has_non_letters_ratio(s: str, threshold: float = 0.4) -> bool:
-    """
-    Check the ratio of non-letters in a string, e.g. for things like "A.R.G.H"
-    """
-    if len(s) == 0:
-        return False
-    return (sum(c not in string.ascii_letters for c in s) / len(s)) > threshold
-
-
-def is_single_word_printable(s: str) -> bool:
-    """
-    True, if s is a single token of printable characters.
-    """
-    return all(c in string.printable for c in s) and s.split() == 1
-
-
-def extract_wikidata_qids(s: str) -> List[str]:
-    """
-    Given a string, extract all qids.
-    """
-    return re.findall(r"Q[0-9]{1,10}", s)
-
-
-def extract_issns(s: str) -> List[str]:
-    """
-    Given a string return a list of valid ISSN.
-    """
-    pattern = r"[0-9]{4,4}-[0-9]{3,3}[0-9xX]"
-    return [v for v in re.findall(pattern, s) if is_valid_issn(v)]
-
-
-def longest_common_prefix(a: Sequence, b: Sequence) -> Sequence:
-    """
-    Return the longest common prefix of a and b. The length of the return value
-    is at most min(len(a), len(b)).
-    """
-    a, b = sorted((a, b), key=len)
-    for i, (u, v) in enumerate(zip(a, b)):
-        if u != v:
-            return a[:i]
-    return a
-
-
-def common_prefix_length_ratio(a: Sequence, b: Sequence) -> float:
-    """
-    Return a float between 0.0 and 1.0 expressing the ratio between the length
-    of the common shared prefix to the length of the longest sequence.
-    """
-    maxlen = max(len(a), len(b))
-    if maxlen == 0:
-        return 0.0
-    return len(longest_common_prefix(a, b)) / maxlen
-
-
-def hamming_distance(s: str, t: str) -> int:
-    """
-    Return hamming distance of s and t.
-    """
-    return sum((u != v for u, v in itertools.zip_longest(s, t)))
-
-
-def calculate_issn_checkdigit(s: str) -> str:
-    """
-    Given a string of length 7, return the ISSN check value (digit or X) as
-    string.
-    """
-    if len(s) != 7:
-        raise ValueError("seven digits required")
-    ss = sum((int(digit) * f for digit, f in zip(s, range(8, 1, -1))))
-    _, mod = divmod(ss, 11)
-    checkdigit = 0 if mod == 0 else 11 - mod
-    result = "X" if checkdigit == 10 else "{}".format(checkdigit)
-    return result
-
-
-def is_valid_issn(issn: str) -> bool:
-    """
-    Return True, if the ISSN is valid. This does not mean it is registered.
-    """
-    if "-" in issn:
-        issn = issn.replace("-", "")
-    if len(issn) != 8:
-        raise ValueError("invalid issn length: {}".format(issn))
-    checkdigit = calculate_issn_checkdigit(issn[:7])
-    return issn[7] == "{}".format(checkdigit)
-
-
-def keys_with_values(d: Dict) -> List[Any]:
-    """
-    Return all keys of a dictionary which have non-falsy values.
-    """
-    return [k for k, v in d.items() if v]
diff --git a/setup.py b/setup.py
index 33629e6..2e06672 100644
--- a/setup.py
+++ b/setup.py
@@ -23,8 +23,6 @@ with open("README.md", "r") as fh:
         python_requires=">=3.6",
         zip_safe=False,
         entry_points={"console_scripts": [
-            "fuzzycat=fuzzycat.fatcat.main:main",
-            "fuzzycat-issn=fuzzycat.issn:main",
             "fuzzycat-cluster=fuzzycat.cluster:main",
         ],},
         install_requires=[
diff --git a/tests/test_matching.py b/tests/test_matching.py
deleted file mode 100644
index 6ae393b..0000000
--- a/tests/test_matching.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# coding: utf-8
-"""
-Test cases for fuzzy matching.
-"""
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index cc7fae0..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-
-from typing import List, NamedTuple
-
-import pytest
-
-from fuzzycat.utils import *
-
-
-def test_extract_issns():
-    Case = NamedTuple("Case", [("s", str), ("result", List[str])])
-    cases = (
-        Case("", []),
-        Case("Hello 1234", []),
-        Case("Hello 1084-5100 World", ["1084-5100"]),
-        Case("Hello 10845100 World", []),
-        Case("Hello 1084-5100 1084-5100 World", ["1084-5100", "1084-5100"]),
-        Case("2323-573X 2169-1886 Journal", ["2323-573X", "2169-1886"]),
-    )
-    for c in cases:
-        result = extract_issns(c.s)
-        assert result == c.result
-
-
-def test_longest_common_prefix():
-    Case = NamedTuple("Case", [("a", str), ("b", str), ("result", str)])
-    cases = (
-        Case("", "", ""),
-        Case("a", "", ""),
-        Case("ab", "a", "a"),
-        Case("123", "123", "123"),
-    )
-    for c in cases:
-        result = longest_common_prefix(c.a, c.b)
-        assert result == c.result
-
-
-def test_common_prefix_length_ratio():
-    Case = NamedTuple("Case", [("a", str), ("b", str), ("result", float)])
-    cases = (
-        Case("", "", 0.0),
-        Case("a", "", 0.0),
-        Case("Hello World!", "ello", 0.0),
-        Case("ab", "a", 0.5),
-        Case("123", "123", 1.0),
-        Case("1234", "123", 0.75),
-    )
-    for c in cases:
-        result = common_prefix_length_ratio(c.a, c.b)
-        assert result == c.result
-
-
-def test_hamming_distance():
-    Case = NamedTuple("Case", [("a", str), ("b", str), ("result", int)])
-    cases = (
-        Case("", "", 0),
-        Case("a", "a", 0),
-        Case("a", "ab", 1),
-        Case("abc", "cba", 2),
-        Case("1234", "", 4),
-    )
-    for c in cases:
-        result = hamming_distance(c.a, c.b)
-        assert result == c.result
-
-
-def test_is_valid_issn():
-    cases = {
-        "value_error": ("", "1234", "123456", "111122223333", "XXXXXXXX"),
-        "valid": (
-            "0710-4081",
-            "0011-7625",
-            "2268-5901",
-            "1809-0710",
-            "1533-7561",
-            "07104081",
-            "00117625",
-            "22685901",
-            "18090710",
-            "15337561",
-        ),
-        "invalid": (
-            "0710-4080",
-            "0011-7626",
-            "2268-5902",
-            "1809-0709",
-            "1533-7560",
-            "07104080",
-            "00117626",
-            "22685902",
-            "18090709",
-            "15337560",
-        ),
-    }
-    for ve in cases["value_error"]:
-        with pytest.raises(ValueError):
-            is_valid_issn(ve)
-    for v in cases["valid"]:
-        assert is_valid_issn(v) == True
-    for v in cases["invalid"]:
-        assert is_valid_issn(v) == False
-
-
-def test_keys_with_values():
-    Case = NamedTuple("Case", [("d", Dict), ("result", List[Any])])
-    cases = (
-        Case({}, []),
-        Case({"a": "v"}, ["a"]),
-        Case({
-            "a": "",
-            "b": "v"
-        }, ["b"]),
-        Case({
-            "a": None,
-            "b": "v"
-        }, ["b"]),
-        Case({
-            "a": [],
-            "b": "v"
-        }, ["b"]),
-        Case({
-            "a": 0,
-            "b": "v"
-        }, ["b"]),
-    )
-    for case in cases:
-        result = keys_with_values(case.d)
-        assert result == case.result
author	Martin Czygan <martin.czygan@gmail.com>	2020-10-21 03:54:53 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2020-10-21 03:54:53 +0200
commit	c134c0974d0fc8b57a0d3329d389ac72120a01bb (patch)
tree	676ccc717e96dcca92e56e2a490c5c0d18240f45
parent	2cd5ec9f9c3c91dfe79c98f7d73112b88061d383 (diff)
download	fuzzycat-c134c0974d0fc8b57a0d3329d389ac72120a01bb.tar.gz fuzzycat-c134c0974d0fc8b57a0d3329d389ac72120a01bb.zip