diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-14 13:42:57 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-14 13:43:07 -0700 | 
| commit | c01a4ba190cc18a8186545ebb55c35864676b538 (patch) | |
| tree | 6ec59ce457bcaa50932f04c44fd781a53ae33c52 /python/fatcat_tools/importers | |
| parent | 3ee54189b665d37933b6108fb85b28bb823dac4a (diff) | |
| download | fatcat-c01a4ba190cc18a8186545ebb55c35864676b538.tar.gz fatcat-c01a4ba190cc18a8186545ebb55c35864676b538.zip | |
python impl
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 4 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 4 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 4 | 
6 files changed, 16 insertions, 16 deletions
| diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 7838a7ff..bf9600dc 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -128,7 +128,7 @@ class ArabesqueMatchImporter(EntityImporter):              row['final_url'])          urls = [url, ("webarchive", wayback)] -        urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls] +        urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]          if len(urls) > SANE_MAX_URLS:              self.counts['skip-too-many-url'] += 1 @@ -177,7 +177,7 @@ class ArabesqueMatchImporter(EntityImporter):          # merge the existing into this one and update          existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) -        existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] +        existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]          if len(existing.urls) > SANE_MAX_URLS:              self.counts['skip-update-too-many-url'] += 1              return None diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index c1ea075d..9c2bdefc 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -39,14 +39,14 @@ def single_file(prefix, path):          if guess:              mime = guess -    fsm = FilesetEntityManifest( +    fsf = FilesetFile(          path=path,          size=size_bytes,          md5=hashes[0].hexdigest(),          sha1=hashes[1].hexdigest(),          sha256=hashes[2].hexdigest(),          extra=dict(mimetype=mime)) -    return fsm +    return fsf  def make_manifest(base_dir):      manifest = [] @@ -76,7 +76,7 @@ def cdl_dash_release(meta, extra=None):      abstracts = []      for desc in meta['descriptions']:          if desc['type'] == "abstract": -            abstracts.append(ReleaseEntityAbstracts( +            abstracts.append(ReleaseAbstract(                  mimetype="text/html",                  content=clean(desc['value'])))              #print(abstracts) @@ -95,7 +95,7 @@ def cdl_dash_release(meta, extra=None):          ))      r = ReleaseEntity( -        ext_ids=ReleaseEntityExtIds( +        ext_ids=ReleaseExtIds(              doi=doi,              ark=ark_id,          ), diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index c875010c..443879e7 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -309,7 +309,7 @@ class CrossrefImporter(EntityImporter):          abstracts = []          abstract = clean(obj.get('abstract'))          if abstract and len(abstract) > 10: -            abstracts.append(fatcat_client.ReleaseEntityAbstracts( +            abstracts.append(fatcat_client.ReleaseAbstract(                  mimetype="application/xml+jats",                  content=abstract)) @@ -424,7 +424,7 @@ class CrossrefImporter(EntityImporter):              release_date=release_date,              release_year=release_year,              publisher=publisher, -            ext_ids=fatcat_client.ReleaseEntityExtIds( +            ext_ids=fatcat_client.ReleaseExtIds(                  doi=obj['DOI'].lower(),                  pmid=extids['pmid'],                  pmcid=extids['pmcid'], diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 9e99bc0a..573346a1 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -82,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter):          abstract = obj.get('abstract')          if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10: -            abobj = fatcat_client.ReleaseEntityAbstracts( +            abobj = fatcat_client.ReleaseAbstract(                  mimetype="text/plain",                  content=clean(obj.get('abstract')))              abstracts = [abobj] @@ -158,7 +158,7 @@ class GrobidMetadataImporter(EntityImporter):              volume=clean(obj['journal'].get('volume')),              issue=clean(obj['journal'].get('issue')),              abstracts=abstracts, -            ext_ids=fatcat_client.ReleaseEntityExtIds(), +            ext_ids=fatcat_client.ReleaseExtIds(),              extra=extra)          return re @@ -181,10 +181,10 @@ class GrobidMetadataImporter(EntityImporter):              cdx['dt'],              original)          fe.urls.append( -            fatcat_client.FileEntityUrls(url=wayback, rel="webarchive")) +            fatcat_client.FileUrl(url=wayback, rel="webarchive"))          original_url = make_rel_url(original, default_link_rel=self.default_link_rel)          if original_url is not None: -            fe.urls.append(fatcat_client.FileEntityUrls(rel=original_url[0], url=original_url[1])) +            fe.urls.append(fatcat_client.FileUrl(rel=original_url[0], url=original_url[1]))          return fe diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 0afd00e3..f3f2e141 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -88,7 +88,7 @@ class MatchedImporter(EntityImporter):              url = make_rel_url(original, default_link_rel=self.default_link_rel)              if url != None:                  urls.add(url) -        urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls] +        urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]          if len(urls) == 0:              self.counts['skip-no-urls'] += 1              return None @@ -131,7 +131,7 @@ class MatchedImporter(EntityImporter):          # merge the existing into this one and update          existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) -        existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] +        existing.urls = [fatcat_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]          if len(existing.urls) > SANE_MAX_URLS:              self.counts['skip-update-too-many-url'] += 1              return None diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index b4b1111e..73779b95 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -104,7 +104,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):              cdx_output.write(hit + "\n")          cdx = hit.split(' ')          cdx = [x if (x and x != '-') else None for x in cdx] -        webcapture_cdx = WebcaptureEntityCdx( +        webcapture_cdx = WebcaptureCdxLine(              surt=cdx[0],              timestamp=parse_wbm_timestamp(cdx[1]).isoformat() + "Z",              url=cdx[2], @@ -166,7 +166,7 @@ def static_wayback_webcapture(wayback_url, cdx_output=None):      for url in embeds:          cdx_obj = lookup_cdx(url, cdx_output=cdx_output)          cdx_list.append(cdx_obj) -    archive_urls = [WebcaptureEntityArchiveUrls( +    archive_urls = [WebcaptureUrl(          rel="wayback",          url="https://web.archive.org/web/",      )] | 
