From 4001a5c053bb3bb2fec8761f6fc9331fbf2e2f83 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 12 Aug 2020 11:09:44 +0200 Subject: basic scaffolding --- .gitignore | 3 +++ Makefile | 29 +++++++++++++++++++++++++++++ Pipfile | 12 ++++++++++++ Pipfile.lock | 25 +++++++++++++++++++++++++ README.md | 20 +++++++++++++++++++- fuzzycat/__init__.py | 0 fuzzycat/main.py | 2 ++ setup.py | 26 ++++++++++++++++++++++++++ 8 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 Makefile create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 fuzzycat/__init__.py create mode 100644 fuzzycat/main.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index b6e4761..f26a420 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# Data dir +/data diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7d73f56 --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +SHELL := /bin/bash + +.PHONY: deps +deps: ## Install dependencies from setup.py into pipenv + pipenv install '-e .' + +.PHONY: help +help: ## Print info about all commands + @echo "Commands:" + @echo + @grep -E '^[/.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-40s\033[0m %s\n", $$1, $$2}' + +data/release_export_expanded.json.gz: ## Download release export + mkdir -p data + wget -c https://archive.org/download/fatcat_bulk_exports_2020-08-05/release_export_expanded.json.gz -O $@ + +.PHONY: black +black: ## Format all Python files + find . -name "*.py" -exec black {} \; + +.PHONY: dist +dist: ## Create source distribution + python setup.py sdist + +.PHONY: clean +clean: ## Clean all artifacts + rm -rf dist + rm -rf fuzzycat.egg-info/ + diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..0032452 --- /dev/null +++ b/Pipfile @@ -0,0 +1,12 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +fuzzycat = {editable = true, path = "."} + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..f490740 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,25 @@ +{ + "_meta": { + "hash": { + "sha256": "96b4f20b11a61900b475d1e0762724b9f788d22f577f5bcb223b3e997216a62c" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "fuzzycat": { + "editable": true, + "path": "." + } + }, + "develop": {} +} diff --git a/README.md b/README.md index 7d6e5cb..3a543a3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,20 @@ # fcfuzzy -Fuzzy matching publications for fatcat. + +Fuzzy matching publications for [fatcat](https://fatcat.wiki). + +## Motivation + +Most of the results on sites like [Google +Scholar](https://scholar.google.com/scholar?q=fuzzy+matching) group +publications into clusters. Each cluster represents one publication, abstracted +from its concrete representation as a link to a PDF. + +We call the abstract publication *work* and the concrete instance a *release*. +The goal is to group releases under works and to implement a versions feature. + +This repository contains both generic code for matching as well as fatcat +specific code using the fatcat openapi client. + +## Dataset + +Release metadata from: [https://archive.org/details/fatcat_bulk_exports_2020-08-05](https://archive.org/details/fatcat_bulk_exports_2020-08-05). diff --git a/fuzzycat/__init__.py b/fuzzycat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fuzzycat/main.py b/fuzzycat/main.py new file mode 100644 index 0000000..2b88169 --- /dev/null +++ b/fuzzycat/main.py @@ -0,0 +1,2 @@ +def main(): + print("hello fuzzycat") diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9e42793 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + + setuptools.setup( + name="fuzzycat", + version="0.1.0", + author="Martin Czygan", + author_email="martin@archive.org", + description="Fuzzy matching utilities for scholarly metadata", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/miku/fuzzycat", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.6", + zip_safe=False, + entry_points={"console_scripts": ["fuzzycat=fuzzycat.main:main",],}, + install_requires=[], + extras_require={"dev": ["black>=19"],}, + ) -- cgit v1.2.3