summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py22
1 files changed, 20 insertions, 2 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index db631cf..3213b8d 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -1,5 +1,6 @@
import argparse
import datetime
+import json
import sys
import xml.etree.ElementTree
import xml.etree.ElementTree as ET
@@ -8,11 +9,28 @@ from typing import Any, Dict, List, Optional, Sequence
import sentry_sdk
from fatcat_openapi_client import FileEntity, ReleaseEntity, WebcaptureEntity
-from fatcat_scholar.api_entities import *
from fatcat_scholar.config import GIT_REVISION, settings
from fatcat_scholar.grobid2json import teixml2json
from fatcat_scholar.identifiers import clean_doi, clean_pmcid
-from fatcat_scholar.schema import *
+from fatcat_scholar.schema import (
+ AccessType,
+ DocType,
+ IntermediateBundle,
+ RefBiblio,
+ RefStructured,
+ ScholarAbstract,
+ ScholarAccess,
+ ScholarBiblio,
+ ScholarDoc,
+ ScholarFulltext,
+ ScholarSim,
+ clean_small_int,
+ clean_url_conservative,
+ es_abstracts_from_grobid,
+ es_abstracts_from_release,
+ es_biblio_from_release,
+ es_release_from_release,
+)
MAX_BODY_CHARS = 512 * 1024