diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-06-01 01:04:16 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-06-02 00:38:51 -0700 |
commit | 01a1978d5b9667df4ae71a7934512e6c4e3bf9a8 (patch) | |
tree | d9d0e87e0c1bdb842696018174db4f432bab20c9 /fatcat_scholar/schema.py | |
parent | 86b29ed5fca70fc0c52443acf6a5ec1a398ed3f6 (diff) | |
download | fatcat-scholar-01a1978d5b9667df4ae71a7934512e6c4e3bf9a8.tar.gz fatcat-scholar-01a1978d5b9667df4ae71a7934512e6c4e3bf9a8.zip |
schema: add 'crossref' to bundle schema, and add from_json() helper
from_json() refactor was an earlier TODO, to reduce duplication when
updating fields on this class
Diffstat (limited to 'fatcat_scholar/schema.py')
-rw-r--r-- | fatcat_scholar/schema.py | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 9912a97..b8a1923 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -5,6 +5,7 @@ auto-conversion of datetime objects. """ import re +import json import datetime from enum import Enum from typing import Optional, List, Any, Dict @@ -18,7 +19,7 @@ from pydantic import BaseModel # pytype: enable=import-error from fatcat_openapi_client import ReleaseEntity, ReleaseContrib -from fatcat_scholar.api_entities import entity_to_dict +from fatcat_scholar.api_entities import entity_to_dict, entity_from_json from fatcat_scholar.biblio_hacks import doi_link_domain @@ -31,6 +32,7 @@ class IntermediateBundle(BaseModel): doc_type: DocType releases: List[ReleaseEntity] biblio_release_ident: Optional[str] + crossref: Optional[Dict[str, Any]] grobid_fulltext: Optional[Dict[str, Any]] pdftotext_fulltext: Optional[Dict[str, Any]] pdf_meta: Optional[Dict[str, Any]] @@ -45,6 +47,23 @@ class IntermediateBundle(BaseModel): datetime.datetime: lambda dt: dt.isoformat(), } + @classmethod + def from_json(cls, obj: Dict[Any, Any]) -> "IntermediateBundle": + return IntermediateBundle( + doc_type=DocType(obj.get("doc_type")), + releases=[ + entity_from_json(json.dumps(re), ReleaseEntity) + for re in obj.get("releases", []) + ], + biblio_release_ident=obj.get("biblio_release_ident"), + crossref=obj.get("crossref"), + grobid_fulltext=obj.get("grobid_fulltext"), + pdftotext_fulltext=obj.get("pdftotext_fulltext"), + pdf_meta=obj.get("pdf_meta"), + sim_fulltext=obj.get("sim_fulltext"), + html_fulltext=obj.get("html_fulltext"), + ) + class AccessType(str, Enum): ia_sim = "ia_sim" |