diff options
| -rw-r--r-- | fatcat_scholar/schema.py | 17 | ||||
| -rw-r--r-- | fatcat_scholar/transform.py | 3 | 
2 files changed, 16 insertions, 4 deletions
| diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index bec81ab..3d74ca4 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -86,7 +86,9 @@ class ScholarBiblio(BaseModel):      wikidata_qid: Optional[str]      arxiv_id: Optional[str]      jstor_id: Optional[str] -    mag_id: Optional[str] +    doaj_id: Optional[str] +    dblp_id: Optional[str] +    oai_id: Optional[str]      license_slug: Optional[str]      publisher: Optional[str] @@ -112,6 +114,7 @@ class ScholarFulltext(BaseModel):      file_ident: Optional[str]      file_sha1: Optional[str]      file_mimetype: Optional[str] +    size_bytes: Optional[int]      thumbnail_url: Optional[str]      access_url: Optional[str]      access_type: Optional[AccessType] @@ -136,7 +139,9 @@ class ScholarRelease(BaseModel):      wikidata_qid: Optional[str]      arxiv_id: Optional[str]      jstor_id: Optional[str] -    mag_id: Optional[str] +    doaj_id: Optional[str] +    dblp_id: Optional[str] +    oai_id: Optional[str]      license_slug: Optional[str]      container_name: Optional[str] @@ -486,7 +491,9 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:          wikidata_qid=release.ext_ids.wikidata_qid,          arxiv_id=release.ext_ids.arxiv,          jstor_id=release.ext_ids.jstor, -        mag_id=release.ext_ids.mag, +        doaj_id=release.ext_ids.doaj, +        dblp_id=release.ext_ids.dblp, +        oai_id=release.ext_ids.oai,          license_slug=release.license_slug,          publisher=publisher,          container_name=clean_str(container_name), @@ -548,7 +555,9 @@ def es_release_from_release(release: ReleaseEntity) -> ScholarRelease:          wikidata_qid=release.ext_ids.wikidata_qid,          arxiv_id=release.ext_ids.arxiv,          jstor_id=release.ext_ids.jstor, -        mag_id=release.ext_ids.mag, +        doaj_id=release.ext_ids.doaj, +        dblp_id=release.ext_ids.dblp, +        oai_id=release.ext_ids.oai,          license_slug=release.license_slug,          container_name=container_name,          container_ident=container_ident, diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 41ed417..b65197f 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -26,6 +26,7 @@ def es_fulltext_from_sim(sim: Dict[str, Any]) -> Optional[ScholarFulltext]:          # file_ident=None,          # file_sha1=None,          # file_mimetype=None, +        # size_bytes=None,          thumbnail_url=f"https://archive.org/serve/{issue_item}/__ia_thumb.jpg",          access_url=f"https://archive.org/details/{issue_item}/page/{first_page}",          access_type=AccessType.ia_sim, @@ -199,6 +200,7 @@ def _add_file_release_meta(      fulltext.file_ident = fe.ident      fulltext.file_sha1 = fe.sha1      fulltext.file_mimetype = fe.mimetype +    fulltext.size_bytes = fe.size      fulltext.access_url = best_url      fulltext.access_type = best_url_type      if pdf_meta is not None and pdf_meta["pdf_meta"].get("has_page0_thumbnail"): @@ -255,6 +257,7 @@ def es_fulltext_from_html(          # webcapture_ident=wc.ident,          file_sha1=html_fulltext.get("html_meta", {}).get("sha1hex"),          file_mimetype="text/html", +        # size_bytess          access_url=wc.archive_urls[0].url,          access_type=AccessType.wayback,      ) | 
