aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--Makefile29
-rw-r--r--Pipfile12
-rw-r--r--Pipfile.lock25
-rw-r--r--README.md20
-rw-r--r--fuzzycat/__init__.py0
-rw-r--r--fuzzycat/main.py2
-rw-r--r--setup.py26
8 files changed, 116 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index b6e4761..f26a420 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,6 @@ dmypy.json
# Pyre type checker
.pyre/
+
+# Data dir
+/data
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..7d73f56
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,29 @@
+SHELL := /bin/bash
+
+.PHONY: deps
+deps: ## Install dependencies from setup.py into pipenv
+ pipenv install '-e .'
+
+.PHONY: help
+help: ## Print info about all commands
+ @echo "Commands:"
+ @echo
+ @grep -E '^[/.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-40s\033[0m %s\n", $$1, $$2}'
+
+data/release_export_expanded.json.gz: ## Download release export
+ mkdir -p data
+ wget -c https://archive.org/download/fatcat_bulk_exports_2020-08-05/release_export_expanded.json.gz -O $@
+
+.PHONY: black
+black: ## Format all Python files
+ find . -name "*.py" -exec black {} \;
+
+.PHONY: dist
+dist: ## Create source distribution
+ python setup.py sdist
+
+.PHONY: clean
+clean: ## Clean all artifacts
+ rm -rf dist
+ rm -rf fuzzycat.egg-info/
+
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..0032452
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,12 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+fuzzycat = {editable = true, path = "."}
+
+[requires]
+python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..f490740
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,25 @@
+{
+ "_meta": {
+ "hash": {
+ "sha256": "96b4f20b11a61900b475d1e0762724b9f788d22f577f5bcb223b3e997216a62c"
+ },
+ "pipfile-spec": 6,
+ "requires": {
+ "python_version": "3.7"
+ },
+ "sources": [
+ {
+ "name": "pypi",
+ "url": "https://pypi.org/simple",
+ "verify_ssl": true
+ }
+ ]
+ },
+ "default": {
+ "fuzzycat": {
+ "editable": true,
+ "path": "."
+ }
+ },
+ "develop": {}
+}
diff --git a/README.md b/README.md
index 7d6e5cb..3a543a3 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,20 @@
# fcfuzzy
-Fuzzy matching publications for fatcat.
+
+Fuzzy matching publications for [fatcat](https://fatcat.wiki).
+
+## Motivation
+
+Most of the results on sites like [Google
+Scholar](https://scholar.google.com/scholar?q=fuzzy+matching) group
+publications into clusters. Each cluster represents one publication, abstracted
+from its concrete representation as a link to a PDF.
+
+We call the abstract publication *work* and the concrete instance a *release*.
+The goal is to group releases under works and to implement a versions feature.
+
+This repository contains both generic code for matching as well as fatcat
+specific code using the fatcat openapi client.
+
+## Dataset
+
+Release metadata from: [https://archive.org/details/fatcat_bulk_exports_2020-08-05](https://archive.org/details/fatcat_bulk_exports_2020-08-05).
diff --git a/fuzzycat/__init__.py b/fuzzycat/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/fuzzycat/__init__.py
diff --git a/fuzzycat/main.py b/fuzzycat/main.py
new file mode 100644
index 0000000..2b88169
--- /dev/null
+++ b/fuzzycat/main.py
@@ -0,0 +1,2 @@
+def main():
+ print("hello fuzzycat")
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..9e42793
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,26 @@
+import setuptools
+
+with open("README.md", "r") as fh:
+ long_description = fh.read()
+
+ setuptools.setup(
+ name="fuzzycat",
+ version="0.1.0",
+ author="Martin Czygan",
+ author_email="martin@archive.org",
+ description="Fuzzy matching utilities for scholarly metadata",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ url="https://github.com/miku/fuzzycat",
+ packages=setuptools.find_packages(),
+ classifiers=[
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+ ],
+ python_requires=">=3.6",
+ zip_safe=False,
+ entry_points={"console_scripts": ["fuzzycat=fuzzycat.main:main",],},
+ install_requires=[],
+ extras_require={"dev": ["black>=19"],},
+ )