From 7ebda2e051b51e49544ab75673b19ec5f27d9d45 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 12 Nov 2018 23:37:28 -0800 Subject: more python module refactoring --- python/fatcat_export.py | 2 +- python/fatcat_import.py | 10 +-- python/fatcat_tools/changelog_workers.py | 2 +- python/fatcat_tools/crossref_importer.py | 2 +- python/fatcat_tools/elastic_workers.py | 4 +- python/fatcat_tools/entity_helpers.py | 100 ------------------------ python/fatcat_tools/grobid_metadata_importer.py | 2 +- python/fatcat_tools/issn_importer.py | 2 +- python/fatcat_tools/matched_importer.py | 2 +- python/fatcat_tools/orcid_importer.py | 2 +- python/fatcat_tools/transforms.py | 100 ++++++++++++++++++++++++ python/fatcat_worker.py | 4 +- python/tests/crossref.py | 2 +- python/tests/entity_helpers.py | 15 ---- python/tests/fixtures.py | 8 +- python/tests/grobid_metadata_importer.py | 56 ------------- python/tests/grobid_metadata_importer_test.py | 56 +++++++++++++ python/tests/importer.py | 2 +- python/tests/issn.py | 2 +- python/tests/matched_importer.py | 32 -------- python/tests/matched_importer_test.py | 32 ++++++++ python/tests/orcid.py | 2 +- python/tests/routes.py | 1 - python/tests/transform_tests.py | 15 ++++ 24 files changed, 227 insertions(+), 228 deletions(-) delete mode 100644 python/fatcat_tools/entity_helpers.py create mode 100644 python/fatcat_tools/transforms.py delete mode 100644 python/tests/entity_helpers.py delete mode 100644 python/tests/grobid_metadata_importer.py create mode 100644 python/tests/grobid_metadata_importer_test.py delete mode 100644 python/tests/matched_importer.py create mode 100644 python/tests/matched_importer_test.py create mode 100644 python/tests/transform_tests.py diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 6c4502af..eadf69ab 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -5,7 +5,7 @@ import json import argparse import fatcat_client from fatcat_client.rest import ApiException -from fatcat.fcid import uuid2fcid +from fatcat_tools.fcid import uuid2fcid def run_export_releases(args): conf = fatcat_client.Configuration() diff --git a/python/fatcat_import.py b/python/fatcat_import.py index c5610344..6c66d98a 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -2,11 +2,11 @@ import sys import argparse -from fatcat.crossref_importer import FatcatCrossrefImporter -from fatcat.orcid_importer import FatcatOrcidImporter -from fatcat.issn_importer import FatcatIssnImporter -from fatcat.matched_importer import FatcatMatchedImporter -from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter +from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.orcid_importer import FatcatOrcidImporter +from fatcat_tools.issn_importer import FatcatIssnImporter +from fatcat_tools.matched_importer import FatcatMatchedImporter +from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter def run_import_crossref(args): fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file, diff --git a/python/fatcat_tools/changelog_workers.py b/python/fatcat_tools/changelog_workers.py index e341ea32..223d4478 100644 --- a/python/fatcat_tools/changelog_workers.py +++ b/python/fatcat_tools/changelog_workers.py @@ -2,7 +2,7 @@ import json import time from itertools import islice -from fatcat.worker_common import FatcatWorker +from fatcat_tools.worker_common import FatcatWorker from pykafka.common import OffsetType diff --git a/python/fatcat_tools/crossref_importer.py b/python/fatcat_tools/crossref_importer.py index 37005965..6a5ad824 100644 --- a/python/fatcat_tools/crossref_importer.py +++ b/python/fatcat_tools/crossref_importer.py @@ -5,7 +5,7 @@ import sqlite3 import datetime import itertools import fatcat_client -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter class FatcatCrossrefImporter(FatcatImporter): diff --git a/python/fatcat_tools/elastic_workers.py b/python/fatcat_tools/elastic_workers.py index 3d2e9c39..eac8d6b0 100644 --- a/python/fatcat_tools/elastic_workers.py +++ b/python/fatcat_tools/elastic_workers.py @@ -2,9 +2,9 @@ import json import time import requests -from fatcat.worker_common import FatcatWorker +from fatcat_tools.worker_common import FatcatWorker from fatcat_client.models import ReleaseEntity -from fatcat.entity_helpers import * +from fatcat_tools.transforms import * from pykafka.common import OffsetType diff --git a/python/fatcat_tools/entity_helpers.py b/python/fatcat_tools/entity_helpers.py deleted file mode 100644 index c454536b..00000000 --- a/python/fatcat_tools/entity_helpers.py +++ /dev/null @@ -1,100 +0,0 @@ - -import collections -from fatcat_client.models import ReleaseEntity -from fatcat_client.api_client import ApiClient - -def entity_to_json(entity): - ac = ApiClient() - return ac.sanitize_for_serialization(entity) - -def entity_from_json(json_str, entity_type): - """ - Hack to take advantage of the code-generated deserialization code - """ - ac = ApiClient() - thing = collections.namedtuple('Thing', ['data']) - thing.data = json_str - return ac.deserialize(thing, entity_type) - -def release_elastic_dict(release): - """ - Converts from an entity model/schema to elasticsearch oriented schema. - - Returns: dict - """ - - if release.state != 'active': - raise ValueError("Entity is not 'active'") - - # First, the easy ones (direct copy) - t = dict( - ident = release.ident, - revision = release.revision, - title = release.title, - release_type = release.release_type, - release_status = release.release_status, - language = release.language, - doi = release.doi, - pmid = release.pmid, - pmcid = release.pmcid, - isbn13 = release.isbn13, - core_id = release.core_id, - wikidata_qid = release.wikidata_qid - ) - - if release.release_date: - # TODO: resolve why this can be either a string or datetime - if type(release.release_date) == str: - t['release_date'] = release.release_date - else: - t['release_date'] = release.release_date.strftime('%F') - - container = release.container - container_is_kept = False - if container: - t['publisher'] = container.publisher - t['container_name'] = container.name - t['container_issnl'] = container.issnl - container_extra = container.extra - if container_extra: - t['container_is_oa'] = container_extra.get('is_oa') - container_is_kept = container_extra.get('is_kept', False) - t['container_is_longtail_oa'] = container_extra.get('is_longtail_oa') - else: - t['publisher'] = release.publisher - - files = release.files or [] - t['file_count'] = len(files) - in_wa = False - in_ia = False - t['file_pdf_url'] = None - for f in files: - is_pdf = 'pdf' in f.get('mimetype', '') - for url in f.get('urls', []): - if url.get('rel', '') == 'webarchive': - in_wa = True - if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']: - in_ia = True - if is_pdf: - t['file_pdf_url'] = url['url'] - if not t['file_pdf_url'] and is_pdf: - t['file_pdf_url'] = url['url'] - t['file_in_webarchive'] = in_wa - t['file_in_ia'] = in_ia - - extra = release.extra or dict() - if extra: - t['in_shadow'] = extra.get('in_shadow') - if extra.get('grobid') and extra['grobid'].get('is_longtail_oa'): - t['container_is_longtail_oa'] = True - t['any_abstract'] = bool(release.abstracts) - t['is_kept'] = container_is_kept or extra.get('is_kept', False) - - t['ref_count'] = len(release.refs or []) - t['contrib_count'] = len(release.contribs or []) - contrib_names = [] - for c in (release.contribs or []): - if c.raw_name: - contrib_names.append(c.raw_name) - t['contrib_names'] = contrib_names - return t diff --git a/python/fatcat_tools/grobid_metadata_importer.py b/python/fatcat_tools/grobid_metadata_importer.py index 95cc285e..effa0d94 100755 --- a/python/fatcat_tools/grobid_metadata_importer.py +++ b/python/fatcat_tools/grobid_metadata_importer.py @@ -5,7 +5,7 @@ import json import base64 import datetime import fatcat_client -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter MAX_ABSTRACT_BYTES=4096 diff --git a/python/fatcat_tools/issn_importer.py b/python/fatcat_tools/issn_importer.py index c9ef50b5..e3ed7382 100644 --- a/python/fatcat_tools/issn_importer.py +++ b/python/fatcat_tools/issn_importer.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter # CSV format (generated from git.archive.org/webgroup/oa-journal-analysis): # ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count diff --git a/python/fatcat_tools/matched_importer.py b/python/fatcat_tools/matched_importer.py index 7f55369b..627ab6f1 100644 --- a/python/fatcat_tools/matched_importer.py +++ b/python/fatcat_tools/matched_importer.py @@ -4,7 +4,7 @@ import json import sqlite3 import itertools import fatcat_client -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter #row = row.split('\t') #assert len(row) == 2 diff --git a/python/fatcat_tools/orcid_importer.py b/python/fatcat_tools/orcid_importer.py index e1f5943c..f2366c66 100644 --- a/python/fatcat_tools/orcid_importer.py +++ b/python/fatcat_tools/orcid_importer.py @@ -3,7 +3,7 @@ import sys import json import itertools import fatcat_client -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter def value_or_none(e): if type(e) == dict: diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py new file mode 100644 index 00000000..c454536b --- /dev/null +++ b/python/fatcat_tools/transforms.py @@ -0,0 +1,100 @@ + +import collections +from fatcat_client.models import ReleaseEntity +from fatcat_client.api_client import ApiClient + +def entity_to_json(entity): + ac = ApiClient() + return ac.sanitize_for_serialization(entity) + +def entity_from_json(json_str, entity_type): + """ + Hack to take advantage of the code-generated deserialization code + """ + ac = ApiClient() + thing = collections.namedtuple('Thing', ['data']) + thing.data = json_str + return ac.deserialize(thing, entity_type) + +def release_elastic_dict(release): + """ + Converts from an entity model/schema to elasticsearch oriented schema. + + Returns: dict + """ + + if release.state != 'active': + raise ValueError("Entity is not 'active'") + + # First, the easy ones (direct copy) + t = dict( + ident = release.ident, + revision = release.revision, + title = release.title, + release_type = release.release_type, + release_status = release.release_status, + language = release.language, + doi = release.doi, + pmid = release.pmid, + pmcid = release.pmcid, + isbn13 = release.isbn13, + core_id = release.core_id, + wikidata_qid = release.wikidata_qid + ) + + if release.release_date: + # TODO: resolve why this can be either a string or datetime + if type(release.release_date) == str: + t['release_date'] = release.release_date + else: + t['release_date'] = release.release_date.strftime('%F') + + container = release.container + container_is_kept = False + if container: + t['publisher'] = container.publisher + t['container_name'] = container.name + t['container_issnl'] = container.issnl + container_extra = container.extra + if container_extra: + t['container_is_oa'] = container_extra.get('is_oa') + container_is_kept = container_extra.get('is_kept', False) + t['container_is_longtail_oa'] = container_extra.get('is_longtail_oa') + else: + t['publisher'] = release.publisher + + files = release.files or [] + t['file_count'] = len(files) + in_wa = False + in_ia = False + t['file_pdf_url'] = None + for f in files: + is_pdf = 'pdf' in f.get('mimetype', '') + for url in f.get('urls', []): + if url.get('rel', '') == 'webarchive': + in_wa = True + if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']: + in_ia = True + if is_pdf: + t['file_pdf_url'] = url['url'] + if not t['file_pdf_url'] and is_pdf: + t['file_pdf_url'] = url['url'] + t['file_in_webarchive'] = in_wa + t['file_in_ia'] = in_ia + + extra = release.extra or dict() + if extra: + t['in_shadow'] = extra.get('in_shadow') + if extra.get('grobid') and extra['grobid'].get('is_longtail_oa'): + t['container_is_longtail_oa'] = True + t['any_abstract'] = bool(release.abstracts) + t['is_kept'] = container_is_kept or extra.get('is_kept', False) + + t['ref_count'] = len(release.refs or []) + t['contrib_count'] = len(release.contribs or []) + contrib_names = [] + for c in (release.contribs or []): + if c.raw_name: + contrib_names.append(c.raw_name) + t['contrib_names'] = contrib_names + return t diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index 50ff0fb7..5d3731f6 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -2,8 +2,8 @@ import sys import argparse -from fatcat.changelog_workers import FatcatChangelogWorker, FatcatEntityUpdatesWorker -from fatcat.elastic_workers import FatcatElasticReleaseWorker +from fatcat_tools.changelog_workers import FatcatChangelogWorker, FatcatEntityUpdatesWorker +from fatcat_tools.elastic_workers import FatcatElasticReleaseWorker def run_changelog_worker(args): topic = "fatcat-{}.changelog".format(args.env) diff --git a/python/tests/crossref.py b/python/tests/crossref.py index 59be9886..04ac5e8e 100644 --- a/python/tests/crossref.py +++ b/python/tests/crossref.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.crossref_importer import FatcatCrossrefImporter @pytest.fixture(scope="function") diff --git a/python/tests/entity_helpers.py b/python/tests/entity_helpers.py deleted file mode 100644 index dd6fa00a..00000000 --- a/python/tests/entity_helpers.py +++ /dev/null @@ -1,15 +0,0 @@ - -import json -import pytest -from fatcat.crossref_importer import FatcatCrossrefImporter -from fatcat.entity_helpers import * - -from crossref import crossref_importer - -def test_elastic_convert(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: - # not a single line - raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) - r.state = 'active' - release_elastic_dict(r) diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 6ff05495..6f68cf5c 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -4,14 +4,14 @@ import time import json import signal import pytest -import fatcat +import fatcat_web @pytest.fixture def full_app(): - fatcat.app.testing = True - fatcat.app.debug = False - return fatcat.app + fatcat_web.app.testing = True + fatcat_web.app.debug = False + return fatcat_web.app @pytest.fixture def app(full_app): diff --git a/python/tests/grobid_metadata_importer.py b/python/tests/grobid_metadata_importer.py deleted file mode 100644 index 2c8565aa..00000000 --- a/python/tests/grobid_metadata_importer.py +++ /dev/null @@ -1,56 +0,0 @@ - -import os -import json -import base64 -import pytest -from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter - -""" -WARNING: these tests are currently very fragile because they have database -side-effects. Should probably be disabled or re-written. -""" - -@pytest.fixture(scope="function") -def grobid_metadata_importer(): - yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) - -def test_grobid_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - raw = json.loads(f.readline().split('\t')[4]) - re = grobid_metadata_importer.parse_grobid_json(raw) - assert re - assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" - assert len(re.contribs) == 5 - print(re.contribs) - assert re.contribs[0].raw_name == "Wahyu Ary" - assert re.publisher == None - assert re.extra.get('container_name') == None - assert len(re.refs) == 27 - -def test_file_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - f.readline() - raw = f.readline().split('\t') - # randomize sha1 so tests are repeatable - random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) - fe = grobid_metadata_importer.parse_file_metadata( - random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) - assert fe - #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" - assert fe.md5 == None - assert fe.mimetype == "application/pdf" - assert fe.size == 142710 - assert fe.urls[1].url.startswith("http://via.library.depaul.edu") - assert fe.urls[1].rel == "web" - assert fe.urls[0].url.startswith("https://web.archive.org/") - assert fe.urls[0].rel == "webarchive" - assert len(fe.releases) == 0 - -def test_grobid_metadata_importer(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) diff --git a/python/tests/grobid_metadata_importer_test.py b/python/tests/grobid_metadata_importer_test.py new file mode 100644 index 00000000..502ca74a --- /dev/null +++ b/python/tests/grobid_metadata_importer_test.py @@ -0,0 +1,56 @@ + +import os +import json +import base64 +import pytest +from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter + +""" +WARNING: these tests are currently very fragile because they have database +side-effects. Should probably be disabled or re-written. +""" + +@pytest.fixture(scope="function") +def grobid_metadata_importer(): + yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +#def test_grobid_metadata_importer_batch(grobid_metadata_importer): +# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: +# grobid_metadata_importer.process_batch(f) + +def test_grobid_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + raw = json.loads(f.readline().split('\t')[4]) + re = grobid_metadata_importer.parse_grobid_json(raw) + assert re + assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" + assert len(re.contribs) == 5 + print(re.contribs) + assert re.contribs[0].raw_name == "Wahyu Ary" + assert re.publisher == None + assert re.extra.get('container_name') == None + assert len(re.refs) == 27 + +def test_file_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + f.readline() + raw = f.readline().split('\t') + # randomize sha1 so tests are repeatable + random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) + fe = grobid_metadata_importer.parse_file_metadata( + random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) + assert fe + #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" + assert fe.md5 == None + assert fe.mimetype == "application/pdf" + assert fe.size == 142710 + assert fe.urls[1].url.startswith("http://via.library.depaul.edu") + assert fe.urls[1].rel == "web" + assert fe.urls[0].url.startswith("https://web.archive.org/") + assert fe.urls[0].rel == "webarchive" + assert len(fe.releases) == 0 + +def test_grobid_metadata_importer(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.process_source(f) diff --git a/python/tests/importer.py b/python/tests/importer.py index 22af37ed..0de86635 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,7 +1,7 @@ import pytest -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter def test_issnl_mapping_lookup(): diff --git a/python/tests/issn.py b/python/tests/issn.py index fff112f7..76c8aecb 100644 --- a/python/tests/issn.py +++ b/python/tests/issn.py @@ -1,6 +1,6 @@ import pytest -from fatcat.issn_importer import FatcatIssnImporter +from fatcat_tools.issn_importer import FatcatIssnImporter @pytest.fixture(scope="function") diff --git a/python/tests/matched_importer.py b/python/tests/matched_importer.py deleted file mode 100644 index 9cc6aa32..00000000 --- a/python/tests/matched_importer.py +++ /dev/null @@ -1,32 +0,0 @@ - -import json -import pytest -from fatcat.matched_importer import FatcatMatchedImporter - - -@pytest.fixture(scope="function") -def matched_importer(): - yield FatcatMatchedImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -def test_matched_importer_batch(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_batch(f) - -def test_matched_importer(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_source(f) - -def test_matched_dict_parse(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - raw = json.loads(f.readline()) - f = matched_importer.parse_matched_dict(raw) - assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" - assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" - assert f.mimetype == "application/pdf" - assert f.size == 255629 - assert f.urls[1].url.startswith("http://journals.plos.org") - assert f.urls[1].rel == "web" - assert f.urls[0].url.startswith("https://web.archive.org/") - assert f.urls[0].rel == "webarchive" - assert len(f.releases) == 1 diff --git a/python/tests/matched_importer_test.py b/python/tests/matched_importer_test.py new file mode 100644 index 00000000..4042eabb --- /dev/null +++ b/python/tests/matched_importer_test.py @@ -0,0 +1,32 @@ + +import json +import pytest +from fatcat_tools.matched_importer import FatcatMatchedImporter + + +@pytest.fixture(scope="function") +def matched_importer(): + yield FatcatMatchedImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +def test_matched_importer_batch(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_batch(f) + +def test_matched_importer(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_source(f) + +def test_matched_dict_parse(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + raw = json.loads(f.readline()) + f = matched_importer.parse_matched_dict(raw) + assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" + assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" + assert f.mimetype == "application/pdf" + assert f.size == 255629 + assert f.urls[1].url.startswith("http://journals.plos.org") + assert f.urls[1].rel == "web" + assert f.urls[0].url.startswith("https://web.archive.org/") + assert f.urls[0].rel == "webarchive" + assert len(f.releases) == 1 diff --git a/python/tests/orcid.py b/python/tests/orcid.py index ae3d0d0b..f8228cc0 100644 --- a/python/tests/orcid.py +++ b/python/tests/orcid.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat.orcid_importer import FatcatOrcidImporter +from fatcat_tools.orcid_importer import FatcatOrcidImporter @pytest.fixture(scope="function") diff --git a/python/tests/routes.py b/python/tests/routes.py index 8607e7c0..2e208d22 100644 --- a/python/tests/routes.py +++ b/python/tests/routes.py @@ -2,7 +2,6 @@ import json import tempfile import pytest -import fatcat from fatcat_client.rest import ApiException from fixtures import * diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py new file mode 100644 index 00000000..669c2526 --- /dev/null +++ b/python/tests/transform_tests.py @@ -0,0 +1,15 @@ + +import json +import pytest +from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.transforms import * + +from crossref import crossref_importer + +def test_elastic_convert(crossref_importer): + with open('tests/files/crossref-works.single.json', 'r') as f: + # not a single line + raw = json.loads(f.read()) + (r, c) = crossref_importer.parse_crossref_dict(raw) + r.state = 'active' + release_elastic_dict(r) -- cgit v1.2.3