aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/schema.py17
-rw-r--r--fatcat_scholar/transform.py3
2 files changed, 16 insertions, 4 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index bec81ab..3d74ca4 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -86,7 +86,9 @@ class ScholarBiblio(BaseModel):
wikidata_qid: Optional[str]
arxiv_id: Optional[str]
jstor_id: Optional[str]
- mag_id: Optional[str]
+ doaj_id: Optional[str]
+ dblp_id: Optional[str]
+ oai_id: Optional[str]
license_slug: Optional[str]
publisher: Optional[str]
@@ -112,6 +114,7 @@ class ScholarFulltext(BaseModel):
file_ident: Optional[str]
file_sha1: Optional[str]
file_mimetype: Optional[str]
+ size_bytes: Optional[int]
thumbnail_url: Optional[str]
access_url: Optional[str]
access_type: Optional[AccessType]
@@ -136,7 +139,9 @@ class ScholarRelease(BaseModel):
wikidata_qid: Optional[str]
arxiv_id: Optional[str]
jstor_id: Optional[str]
- mag_id: Optional[str]
+ doaj_id: Optional[str]
+ dblp_id: Optional[str]
+ oai_id: Optional[str]
license_slug: Optional[str]
container_name: Optional[str]
@@ -486,7 +491,9 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:
wikidata_qid=release.ext_ids.wikidata_qid,
arxiv_id=release.ext_ids.arxiv,
jstor_id=release.ext_ids.jstor,
- mag_id=release.ext_ids.mag,
+ doaj_id=release.ext_ids.doaj,
+ dblp_id=release.ext_ids.dblp,
+ oai_id=release.ext_ids.oai,
license_slug=release.license_slug,
publisher=publisher,
container_name=clean_str(container_name),
@@ -548,7 +555,9 @@ def es_release_from_release(release: ReleaseEntity) -> ScholarRelease:
wikidata_qid=release.ext_ids.wikidata_qid,
arxiv_id=release.ext_ids.arxiv,
jstor_id=release.ext_ids.jstor,
- mag_id=release.ext_ids.mag,
+ doaj_id=release.ext_ids.doaj,
+ dblp_id=release.ext_ids.dblp,
+ oai_id=release.ext_ids.oai,
license_slug=release.license_slug,
container_name=container_name,
container_ident=container_ident,
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 41ed417..b65197f 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -26,6 +26,7 @@ def es_fulltext_from_sim(sim: Dict[str, Any]) -> Optional[ScholarFulltext]:
# file_ident=None,
# file_sha1=None,
# file_mimetype=None,
+ # size_bytes=None,
thumbnail_url=f"https://archive.org/serve/{issue_item}/__ia_thumb.jpg",
access_url=f"https://archive.org/details/{issue_item}/page/{first_page}",
access_type=AccessType.ia_sim,
@@ -199,6 +200,7 @@ def _add_file_release_meta(
fulltext.file_ident = fe.ident
fulltext.file_sha1 = fe.sha1
fulltext.file_mimetype = fe.mimetype
+ fulltext.size_bytes = fe.size
fulltext.access_url = best_url
fulltext.access_type = best_url_type
if pdf_meta is not None and pdf_meta["pdf_meta"].get("has_page0_thumbnail"):
@@ -255,6 +257,7 @@ def es_fulltext_from_html(
# webcapture_ident=wc.ident,
file_sha1=html_fulltext.get("html_meta", {}).get("sha1hex"),
file_mimetype="text/html",
+ # size_bytess
access_url=wc.archive_urls[0].url,
access_type=AccessType.wayback,
)