3 files changed, 62 insertions, 30 deletions
diff --git a/Makefile b/Makefile
index a6159f4..0f4a054 100644
--- a/Makefile
+++ b/Makefile
@@ -23,13 +23,6 @@ fmt: ## Apply import sorting and yapf source formatting on all files
 dist: ## Create source distribution and wheel
 	python setup.py sdist bdist_wheel
 
-# https://engineering.fb.com/2018/07/13/data-infrastructure/xars-a-more-efficient-open-source-system-for-self-contained-executables/
-#
-# Required a build from source https://github.com/vasi/squashfuse, to get the squashfuse_ll (low level) executable.
-.PHONY: xar
-xar: ## Create a XAR standalone package (https://github.com/facebookincubator/xar, https://github.com/vasi/squashfuse)
-	python setup.py bdist_xar
-
 .PHONY: cov
 cov: ## Run coverage report
 	pipenv run pytest --cov=fuzzycat fuzzycat/*.py tests/ # --cov-report annotate:cov_annotate --cov-report html
@@ -74,18 +67,3 @@ upload: dist ## Upload to pypi
 	# For automatic package deployments, also see: .gitlab-ci.yml.
 	twine upload $(TWINE_OPTS) dist/*
 
-# ==== data related targets
-#
-# data/release_export_expanded.json.gz: ## Download release export
-# 	mkdir -p data
-# 	wget -c https://archive.org/download/$(FATCAT_BULK_EXPORT_ITEM)/release_export_expanded.json.gz -O $@
-#
-# data/container_export.json.gz: ## Download container export
-# 	mkdir -p data
-# 	wget -c https://archive.org/download/$(FATCAT_BULK_EXPORT_ITEM)/container_export.json.gz -O $@
-#
-# data/name_to_issn.json: data/issn.ndj ## Create a name to ISSN mapping (needs an ISSN JSON dump)
-# 	fuzzycat-issn --make-mapping $^ > $@
-#
-# names.db: data/issn.ndj
-#	fuzzycat-issn --make-shelve -c basic -o names $^
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..401d96f
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+log_cli = True
diff --git a/tests/test_matching.py b/tests/test_matching.py
index 56999e6..997a9e6 100644
--- a/tests/test_matching.py
+++ b/tests/test_matching.py
@@ -1,3 +1,18 @@
+<<<<<<< HEAD
+import logging
+import warnings
+
+import elasticsearch
+import pytest
+import requests
+from dynaconf import Dynaconf
+from fatcat_openapi_client import ReleaseEntity
+
+from fuzzycat.entities import entity_from_dict
+from fuzzycat.matching import anything_to_entity, match_release_fuzzy
+
+warnings.filterwarnings("ignore") # InsecureRequestWarning: Unverified HTTPS request is being made to host ...
+
 from fuzzycat.matching import anything_to_entity, match_release_fuzzy
 from fuzzycat.config import settings
 from fatcat_openapi_client import ReleaseEntity
@@ -8,22 +23,59 @@ import logging
 logger = logging.getLogger('test_matching')
 logger.setLevel(logging.DEBUG)
 
+# ad-hoc override search server with: FUZZYCAT_FATCAT_SEARCH_URL=localhost:9200 pytest ...
 FATCAT_SEARCH_URL = settings.get("FATCAT_SEARCH_URL", "https://search.fatcat.wiki:443")
 
 
+def is_not_reachable(url, timeout=3):
+    return not is_reachable(url)
+
+def is_reachable(url, timeout=3):
+    """
+    Return true, if URL is reachable and returns HTTP 200.
+    """
+    try:
+        return requests.get(url, verify=False, timeout=timeout).ok
+    except Exception:
+        return False
+
 @pytest.fixture
 def es_client():
     return elasticsearch.Elasticsearch([FATCAT_SEARCH_URL])
 
 
-@pytest.mark.skip(reason="we cannot use POST on es, which client uses: https://git.io/JLssk")
-def test_match_release_fuzzy(es_client):
-    cases = (("wtv64ahbdzgwnan7rllwr3nurm", 2), )
-    for case, count in cases:
-        entity = anything_to_entity(case, ReleaseEntity)
-        logger.info(entity.title)
+@pytest.mark.skipif(is_not_reachable(FATCAT_SEARCH_URL),
+                    reason="{} not reachable, use e.g. FUZZYCAT_FATCAT_SEARCH_URL=localhost:9200 to override".format(FATCAT_SEARCH_URL))
+def test_match_release_fuzzy(es_client, caplog):
+    cases = (
+        ("wtv64ahbdzgwnan7rllwr3nurm", 1),
+        ("eqcgtpav3na5jh56o5vjsvb4ei", 1),
+    )
+    for i, (ident, count) in enumerate(cases):
+        entity = anything_to_entity(ident, ReleaseEntity)
+
+        result = match_release_fuzzy(entity, es=es_client)
+        logger.info("[{}] given {}, found {}".format(i, entity.title, len(result)))
+        assert len(result) == count
 
+    # Partial data.
+    cases = (
+        ({
+            "title": "digital libraries",
+            "ext_ids": {}
+        }, 5),
+        ({
+            "title": "The Future of Digital Scholarship",
+            "contribs": [{
+                "raw_name": "Costantino Thanos"
+            }],
+            "ext_ids": {}
+        }, 5),
+    )
+    for i, (doc, count) in enumerate(cases):
+        entity = entity_from_dict(doc, ReleaseEntity)
         result = match_release_fuzzy(entity, es=es_client)
-        logger.info("given: {}".format(entity.title))
-        logger.info("found: {}".format(len(result)))
+        with caplog.at_level(logging.INFO):
+            logging.info("[{}] given {}, found {}, {}".format(i, entity.title, len(result),
+                                                              [v.title for v in result]))
         assert len(result) == count