aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-14 13:42:57 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-14 13:43:07 -0700
commitc01a4ba190cc18a8186545ebb55c35864676b538 (patch)
tree6ec59ce457bcaa50932f04c44fd781a53ae33c52 /python/fatcat_tools/importers
parent3ee54189b665d37933b6108fb85b28bb823dac4a (diff)
downloadfatcat-c01a4ba190cc18a8186545ebb55c35864676b538.tar.gz
fatcat-c01a4ba190cc18a8186545ebb55c35864676b538.zip
python impl
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/arabesque.py4
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py8
-rw-r--r--python/fatcat_tools/importers/crossref.py4
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py8
-rw-r--r--python/fatcat_tools/importers/matched.py4
-rwxr-xr-xpython/fatcat_tools/importers/wayback_static.py4
6 files changed, 16 insertions, 16 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index 7838a7ff..bf9600dc 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -128,7 +128,7 @@ class ArabesqueMatchImporter(EntityImporter):
row['final_url'])
urls = [url, ("webarchive", wayback)]
- urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls]
+ urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
if len(urls) > SANE_MAX_URLS:
self.counts['skip-too-many-url'] += 1
@@ -177,7 +177,7 @@ class ArabesqueMatchImporter(EntityImporter):
# merge the existing into this one and update
existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls]))
- existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls]
+ existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]
if len(existing.urls) > SANE_MAX_URLS:
self.counts['skip-update-too-many-url'] += 1
return None
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index c1ea075d..9c2bdefc 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -39,14 +39,14 @@ def single_file(prefix, path):
if guess:
mime = guess
- fsm = FilesetEntityManifest(
+ fsf = FilesetFile(
path=path,
size=size_bytes,
md5=hashes[0].hexdigest(),
sha1=hashes[1].hexdigest(),
sha256=hashes[2].hexdigest(),
extra=dict(mimetype=mime))
- return fsm
+ return fsf
def make_manifest(base_dir):
manifest = []
@@ -76,7 +76,7 @@ def cdl_dash_release(meta, extra=None):
abstracts = []
for desc in meta['descriptions']:
if desc['type'] == "abstract":
- abstracts.append(ReleaseEntityAbstracts(
+ abstracts.append(ReleaseAbstract(
mimetype="text/html",
content=clean(desc['value'])))
#print(abstracts)
@@ -95,7 +95,7 @@ def cdl_dash_release(meta, extra=None):
))
r = ReleaseEntity(
- ext_ids=ReleaseEntityExtIds(
+ ext_ids=ReleaseExtIds(
doi=doi,
ark=ark_id,
),
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index c875010c..443879e7 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -309,7 +309,7 @@ class CrossrefImporter(EntityImporter):
abstracts = []
abstract = clean(obj.get('abstract'))
if abstract and len(abstract) > 10:
- abstracts.append(fatcat_client.ReleaseEntityAbstracts(
+ abstracts.append(fatcat_client.ReleaseAbstract(
mimetype="application/xml+jats",
content=abstract))
@@ -424,7 +424,7 @@ class CrossrefImporter(EntityImporter):
release_date=release_date,
release_year=release_year,
publisher=publisher,
- ext_ids=fatcat_client.ReleaseEntityExtIds(
+ ext_ids=fatcat_client.ReleaseExtIds(
doi=obj['DOI'].lower(),
pmid=extids['pmid'],
pmcid=extids['pmcid'],
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 9e99bc0a..573346a1 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -82,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter):
abstract = obj.get('abstract')
if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10:
- abobj = fatcat_client.ReleaseEntityAbstracts(
+ abobj = fatcat_client.ReleaseAbstract(
mimetype="text/plain",
content=clean(obj.get('abstract')))
abstracts = [abobj]
@@ -158,7 +158,7 @@ class GrobidMetadataImporter(EntityImporter):
volume=clean(obj['journal'].get('volume')),
issue=clean(obj['journal'].get('issue')),
abstracts=abstracts,
- ext_ids=fatcat_client.ReleaseEntityExtIds(),
+ ext_ids=fatcat_client.ReleaseExtIds(),
extra=extra)
return re
@@ -181,10 +181,10 @@ class GrobidMetadataImporter(EntityImporter):
cdx['dt'],
original)
fe.urls.append(
- fatcat_client.FileEntityUrls(url=wayback, rel="webarchive"))
+ fatcat_client.FileUrl(url=wayback, rel="webarchive"))
original_url = make_rel_url(original, default_link_rel=self.default_link_rel)
if original_url is not None:
- fe.urls.append(fatcat_client.FileEntityUrls(rel=original_url[0], url=original_url[1]))
+ fe.urls.append(fatcat_client.FileUrl(rel=original_url[0], url=original_url[1]))
return fe
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 0afd00e3..f3f2e141 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -88,7 +88,7 @@ class MatchedImporter(EntityImporter):
url = make_rel_url(original, default_link_rel=self.default_link_rel)
if url != None:
urls.add(url)
- urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls]
+ urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
if len(urls) == 0:
self.counts['skip-no-urls'] += 1
return None
@@ -131,7 +131,7 @@ class MatchedImporter(EntityImporter):
# merge the existing into this one and update
existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls]))
- existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls]
+ existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]
if len(existing.urls) > SANE_MAX_URLS:
self.counts['skip-update-too-many-url'] += 1
return None
diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py
index b4b1111e..73779b95 100755
--- a/python/fatcat_tools/importers/wayback_static.py
+++ b/python/fatcat_tools/importers/wayback_static.py
@@ -104,7 +104,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
cdx_output.write(hit + "\n")
cdx = hit.split(' ')
cdx = [x if (x and x != '-') else None for x in cdx]
- webcapture_cdx = WebcaptureEntityCdx(
+ webcapture_cdx = WebcaptureCdxLine(
surt=cdx[0],
timestamp=parse_wbm_timestamp(cdx[1]).isoformat() + "Z",
url=cdx[2],
@@ -166,7 +166,7 @@ def static_wayback_webcapture(wayback_url, cdx_output=None):
for url in embeds:
cdx_obj = lookup_cdx(url, cdx_output=cdx_output)
cdx_list.append(cdx_obj)
- archive_urls = [WebcaptureEntityArchiveUrls(
+ archive_urls = [WebcaptureUrl(
rel="wayback",
url="https://web.archive.org/web/",
)]