From c01a4ba190cc18a8186545ebb55c35864676b538 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 14 May 2019 13:42:57 -0700 Subject: python impl --- python/fatcat_tools/importers/arabesque.py | 4 ++-- python/fatcat_tools/importers/cdl_dash_dat.py | 8 ++++---- python/fatcat_tools/importers/crossref.py | 4 ++-- python/fatcat_tools/importers/grobid_metadata.py | 8 ++++---- python/fatcat_tools/importers/matched.py | 4 ++-- python/fatcat_tools/importers/wayback_static.py | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 7838a7ff..bf9600dc 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -128,7 +128,7 @@ class ArabesqueMatchImporter(EntityImporter): row['final_url']) urls = [url, ("webarchive", wayback)] - urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls] + urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls] if len(urls) > SANE_MAX_URLS: self.counts['skip-too-many-url'] += 1 @@ -177,7 +177,7 @@ class ArabesqueMatchImporter(EntityImporter): # merge the existing into this one and update existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) - existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] + existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls] if len(existing.urls) > SANE_MAX_URLS: self.counts['skip-update-too-many-url'] += 1 return None diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index c1ea075d..9c2bdefc 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -39,14 +39,14 @@ def single_file(prefix, path): if guess: mime = guess - fsm = FilesetEntityManifest( + fsf = FilesetFile( path=path, size=size_bytes, md5=hashes[0].hexdigest(), sha1=hashes[1].hexdigest(), sha256=hashes[2].hexdigest(), extra=dict(mimetype=mime)) - return fsm + return fsf def make_manifest(base_dir): manifest = [] @@ -76,7 +76,7 @@ def cdl_dash_release(meta, extra=None): abstracts = [] for desc in meta['descriptions']: if desc['type'] == "abstract": - abstracts.append(ReleaseEntityAbstracts( + abstracts.append(ReleaseAbstract( mimetype="text/html", content=clean(desc['value']))) #print(abstracts) @@ -95,7 +95,7 @@ def cdl_dash_release(meta, extra=None): )) r = ReleaseEntity( - ext_ids=ReleaseEntityExtIds( + ext_ids=ReleaseExtIds( doi=doi, ark=ark_id, ), diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index c875010c..443879e7 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -309,7 +309,7 @@ class CrossrefImporter(EntityImporter): abstracts = [] abstract = clean(obj.get('abstract')) if abstract and len(abstract) > 10: - abstracts.append(fatcat_client.ReleaseEntityAbstracts( + abstracts.append(fatcat_client.ReleaseAbstract( mimetype="application/xml+jats", content=abstract)) @@ -424,7 +424,7 @@ class CrossrefImporter(EntityImporter): release_date=release_date, release_year=release_year, publisher=publisher, - ext_ids=fatcat_client.ReleaseEntityExtIds( + ext_ids=fatcat_client.ReleaseExtIds( doi=obj['DOI'].lower(), pmid=extids['pmid'], pmcid=extids['pmcid'], diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 9e99bc0a..573346a1 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -82,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter): abstract = obj.get('abstract') if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10: - abobj = fatcat_client.ReleaseEntityAbstracts( + abobj = fatcat_client.ReleaseAbstract( mimetype="text/plain", content=clean(obj.get('abstract'))) abstracts = [abobj] @@ -158,7 +158,7 @@ class GrobidMetadataImporter(EntityImporter): volume=clean(obj['journal'].get('volume')), issue=clean(obj['journal'].get('issue')), abstracts=abstracts, - ext_ids=fatcat_client.ReleaseEntityExtIds(), + ext_ids=fatcat_client.ReleaseExtIds(), extra=extra) return re @@ -181,10 +181,10 @@ class GrobidMetadataImporter(EntityImporter): cdx['dt'], original) fe.urls.append( - fatcat_client.FileEntityUrls(url=wayback, rel="webarchive")) + fatcat_client.FileUrl(url=wayback, rel="webarchive")) original_url = make_rel_url(original, default_link_rel=self.default_link_rel) if original_url is not None: - fe.urls.append(fatcat_client.FileEntityUrls(rel=original_url[0], url=original_url[1])) + fe.urls.append(fatcat_client.FileUrl(rel=original_url[0], url=original_url[1])) return fe diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 0afd00e3..f3f2e141 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -88,7 +88,7 @@ class MatchedImporter(EntityImporter): url = make_rel_url(original, default_link_rel=self.default_link_rel) if url != None: urls.add(url) - urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls] + urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls] if len(urls) == 0: self.counts['skip-no-urls'] += 1 return None @@ -131,7 +131,7 @@ class MatchedImporter(EntityImporter): # merge the existing into this one and update existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) - existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] + existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls] if len(existing.urls) > SANE_MAX_URLS: self.counts['skip-update-too-many-url'] += 1 return None diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index b4b1111e..73779b95 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -104,7 +104,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None): cdx_output.write(hit + "\n") cdx = hit.split(' ') cdx = [x if (x and x != '-') else None for x in cdx] - webcapture_cdx = WebcaptureEntityCdx( + webcapture_cdx = WebcaptureCdxLine( surt=cdx[0], timestamp=parse_wbm_timestamp(cdx[1]).isoformat() + "Z", url=cdx[2], @@ -166,7 +166,7 @@ def static_wayback_webcapture(wayback_url, cdx_output=None): for url in embeds: cdx_obj = lookup_cdx(url, cdx_output=cdx_output) cdx_list.append(cdx_obj) - archive_urls = [WebcaptureEntityArchiveUrls( + archive_urls = [WebcaptureUrl( rel="wayback", url="https://web.archive.org/web/", )] -- cgit v1.2.3