refactor transforms into sub-dir

author: Bryan Newbold <bnewbold@robocracy.org> 2019-03-11 16:38:51 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2019-03-11 16:38:51 -0700
commit: 655f7060eb5b5e711a8a892cb1085639c4aa8fd2 (patch)
tree: ffa1139e0c56b6510ec71d1aa8cc426423449f11 /python/fatcat_tools
parent: c937447f894cfde54628fecf3fa71127cb769f0c (diff)
download: fatcat-655f7060eb5b5e711a8a892cb1085639c4aa8fd2.tar.gz
fatcat-655f7060eb5b5e711a8a892cb1085639c4aa8fd2.zip
5 files changed, 206 insertions, 193 deletions
diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py
index c72ccd47..f2798f0b 100644
--- a/python/fatcat_tools/__init__.py
+++ b/python/fatcat_tools/__init__.py
@@ -1,6 +1,4 @@
 
 from .api_auth import authenticated_api, public_api
 from .fcid import fcid2uuid, uuid2fcid
-from .transforms import entity_to_dict, entity_from_json, \
-    release_to_elasticsearch, container_to_elasticsearch, \
-    changelog_to_elasticsearch, release_to_csl
+from .transforms import *
diff --git a/python/fatcat_tools/transforms/__init__.py b/python/fatcat_tools/transforms/__init__.py
new file mode 100644
index 00000000..4950433b
--- /dev/null
+++ b/python/fatcat_tools/transforms/__init__.py
@@ -0,0 +1,4 @@
+
+from .entities import entity_to_dict, entity_from_json
+from .elasticsearch import release_to_elasticsearch, container_to_elasticsearch, changelog_to_elasticsearch
+from .csl import release_to_csl
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
new file mode 100644
index 00000000..f9615b26
--- /dev/null
+++ b/python/fatcat_tools/transforms/csl.py
@@ -0,0 +1,170 @@
+
+
+import collections
+from fatcat_client import ApiClient
+
+
+def contribs_by_role(contribs, role):
+    ret = [c.copy() for c in contribs if c['role'] == role]
+    [c.pop('role') for c in ret]
+    # XXX:
+    [c.pop('literal') for c in ret]
+    if not ret:
+        return None
+    else:
+        return ret
+
+
+def release_to_csl(entity):
+    """
+    Returns a python dict which can be json.dumps() to get a CSL-JSON (aka,
+    citeproc-JSON, aka Citation Style Language JSON)
+
+    This function will likely become an API method/endpoint
+
+    Follows, but not enforced by: https://github.com/citation-style-language/schema/blob/master/csl-data.json
+    """
+    contribs = []
+    for contrib in (entity.contribs or []):
+        if contrib.creator:
+            # TODO: should we actually be pulling creator metadata? or just
+            # using release-local raw metadata?
+            c = dict(
+                family=contrib.creator.surname,
+                given=contrib.creator.given_name,
+                #dropping-particle
+                #non-dropping-particle
+                #suffix
+                #comma-suffix
+                #static-ordering
+                literal=contrib.raw_name, # or display_name?
+                #parse-names,
+                role=contrib.role,
+            )
+        else:
+            c = dict(
+                # XXX: possible inclusion of full name metadata in release_contrib
+                family=contrib.raw_name.split()[-1],
+                literal=contrib.raw_name,
+                role=contrib.role,
+            )
+        for k in list(c.keys()):
+            if not c[k]:
+                c.pop(k)
+        contribs.append(c)
+    abstract = None
+    if entity.abstracts:
+        abstract = entity.abstracts[0].content
+
+    issued_date = None
+    if entity.release_date:
+        issued_date = {"date-parts": [[
+            entity.release_date.year,
+            entity.release_date.month,
+            entity.release_date.day,
+        ]]}
+    elif entity.release_year:
+        issued_date = {"date-parts": [[entity.release_year]]}
+
+    csl = dict(
+        #id,
+        #categories
+        type=entity.release_type or "article", # XXX: can't be blank
+        language=entity.language,
+        #journalAbbreviation
+        #shortTitle
+        ## see below for all contrib roles
+        #accessed
+        #container
+        #event-date
+        issued=issued_date,
+        #original-date
+        #submitted
+        abstract=abstract,
+        #annote
+        #archive
+        #archive_location
+        #archive-place
+        #authority
+        #call-number
+        #chapter-number
+        #citation-number
+        #citation-label
+        #collection-number
+        #collection-title
+        container_title=entity.container and entity.container.name,
+        #container-title-short
+        #dimensions
+        DOI=entity.doi,
+        #edition
+        #event
+        #event-place
+        #first-reference-note-number
+        #genre
+        ISBN=entity.isbn13,
+        ISSN=entity.container and entity.container.issnl,
+        issue=entity.issue,
+        #jurisdiction
+        #keyword
+        #locator
+        #medium
+        #note
+        #number
+        #number-of-pages
+        #number-of-volumes
+        #original-publisher
+        #original-publisher-place
+        #original-title
+        # XXX: page=entity.pages,
+        page_first=entity.pages.split('-')[0],
+        PMCID=entity.pmcid,
+        PMID=entity.pmid,
+        publisher=(entity.container and entity.container.publisher) or entity.publisher,
+        #publisher-place
+        #references
+        #reviewed-title
+        #scale
+        #section
+        #source
+        #status
+        title=entity.title,
+        #title-short
+        #URL
+        #version
+        volume=entity.volume,
+        #year-suffix
+    )
+    for role in ['author', 'collection-editor', 'composer', 'container-author',
+            'director', 'editor', 'editorial-director', 'interviewer',
+            'illustrator', 'original-author', 'recipient', 'reviewed-author',
+            'translator']:
+        cbr = contribs_by_role(contribs, role)
+        if cbr:
+            csl[role] = cbr
+    # underline-to-dash
+    csl['container-title'] = csl.pop('container_title')
+    csl['page-first'] = csl.pop('page_first')
+    empty_keys = [k for k,v in csl.items() if not v]
+    for k in empty_keys:
+        csl.pop(k)
+    return csl
+
+
+def refs_to_csl(entity):
+    ret = []
+    for ref in entity.refs:
+        if ref.release_id and False:
+            # TODO: fetch full entity from API and convert with release_to_csl
+            raise NotImplementedError
+        else:
+            issued_date = None
+            if ref.year:
+                issued_date = [[ref.year]]
+            csl = dict(
+                title=ref.title,
+                issued=issued_date,
+            )
+        csl['id'] = ref.key or ref.index, # zero- or one-indexed?
+        ret.append(csl)
+    return ret
+
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms/elasticsearch.py
index f49b5ac9..0c2c5e46 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -3,31 +3,6 @@
 import collections
 from fatcat_client import ApiClient
 
-def entity_to_dict(entity, api_client=None):
-    """
-    Hack to take advantage of the code-generated serialization code.
-
-    Initializing/destroying ApiClient objects is surprisingly expensive
-    (because it involves a threadpool), so we allow passing an existing
-    instance. If you already have a full-on API connection `api`, you can
-    access the ApiClient object as `api.api_client`. This is such a speed-up
-    that this argument may become mandatory.
-    """
-    if not api_client:
-        api_client = ApiClient()
-    return api_client.sanitize_for_serialization(entity)
-
-def entity_from_json(json_str, entity_type, api_client=None):
-    """
-    Hack to take advantage of the code-generated deserialization code
-
-    See not on `entity_to_dict()` about api_client argument.
-    """
-    if not api_client:
-        api_client = ApiClient()
-    thing = collections.namedtuple('Thing', ['data'])
-    thing.data = json_str
-    return api_client.deserialize(thing, entity_type)
 
 def check_kbart(year, archive):
     if not archive or not archive.get('year_spans'):
@@ -319,171 +294,6 @@ def container_to_elasticsearch(entity, force_bool=True):
     return t
 
 
-def contribs_by_role(contribs, role):
-    ret = [c.copy() for c in contribs if c['role'] == role]
-    [c.pop('role') for c in ret]
-    # XXX:
-    [c.pop('literal') for c in ret]
-    if not ret:
-        return None
-    else:
-        return ret
-
-
-def release_to_csl(entity):
-    """
-    Returns a python dict which can be json.dumps() to get a CSL-JSON (aka,
-    citeproc-JSON, aka Citation Style Language JSON)
-
-    This function will likely become an API method/endpoint
-
-    Follows, but not enforced by: https://github.com/citation-style-language/schema/blob/master/csl-data.json
-    """
-    contribs = []
-    for contrib in (entity.contribs or []):
-        if contrib.creator:
-            # TODO: should we actually be pulling creator metadata? or just
-            # using release-local raw metadata?
-            c = dict(
-                family=contrib.creator.surname,
-                given=contrib.creator.given_name,
-                #dropping-particle
-                #non-dropping-particle
-                #suffix
-                #comma-suffix
-                #static-ordering
-                literal=contrib.raw_name, # or display_name?
-                #parse-names,
-                role=contrib.role,
-            )
-        else:
-            c = dict(
-                # XXX: possible inclusion of full name metadata in release_contrib
-                family=contrib.raw_name.split()[-1],
-                literal=contrib.raw_name,
-                role=contrib.role,
-            )
-        for k in list(c.keys()):
-            if not c[k]:
-                c.pop(k)
-        contribs.append(c)
-    abstract = None
-    if entity.abstracts:
-        abstract = entity.abstracts[0].content
-
-    issued_date = None
-    if entity.release_date:
-        issued_date = {"date-parts": [[
-            entity.release_date.year,
-            entity.release_date.month,
-            entity.release_date.day,
-        ]]}
-    elif entity.release_year:
-        issued_date = {"date-parts": [[entity.release_year]]}
-
-    csl = dict(
-        #id,
-        #categories
-        type=entity.release_type or "article", # XXX: can't be blank
-        language=entity.language,
-        #journalAbbreviation
-        #shortTitle
-        ## see below for all contrib roles
-        #accessed
-        #container
-        #event-date
-        issued=issued_date,
-        #original-date
-        #submitted
-        abstract=abstract,
-        #annote
-        #archive
-        #archive_location
-        #archive-place
-        #authority
-        #call-number
-        #chapter-number
-        #citation-number
-        #citation-label
-        #collection-number
-        #collection-title
-        container_title=entity.container and entity.container.name,
-        #container-title-short
-        #dimensions
-        DOI=entity.doi,
-        #edition
-        #event
-        #event-place
-        #first-reference-note-number
-        #genre
-        ISBN=entity.isbn13,
-        ISSN=entity.container and entity.container.issnl,
-        issue=entity.issue,
-        #jurisdiction
-        #keyword
-        #locator
-        #medium
-        #note
-        #number
-        #number-of-pages
-        #number-of-volumes
-        #original-publisher
-        #original-publisher-place
-        #original-title
-        # XXX: page=entity.pages,
-        page_first=entity.pages.split('-')[0],
-        PMCID=entity.pmcid,
-        PMID=entity.pmid,
-        publisher=(entity.container and entity.container.publisher) or entity.publisher,
-        #publisher-place
-        #references
-        #reviewed-title
-        #scale
-        #section
-        #source
-        #status
-        title=entity.title,
-        #title-short
-        #URL
-        #version
-        volume=entity.volume,
-        #year-suffix
-    )
-    for role in ['author', 'collection-editor', 'composer', 'container-author',
-            'director', 'editor', 'editorial-director', 'interviewer',
-            'illustrator', 'original-author', 'recipient', 'reviewed-author',
-            'translator']:
-        cbr = contribs_by_role(contribs, role)
-        if cbr:
-            csl[role] = cbr
-    # underline-to-dash
-    csl['container-title'] = csl.pop('container_title')
-    csl['page-first'] = csl.pop('page_first')
-    empty_keys = [k for k,v in csl.items() if not v]
-    for k in empty_keys:
-        csl.pop(k)
-    return csl
-
-
-def refs_to_csl(entity):
-    ret = []
-    for ref in entity.refs:
-        if ref.release_id and False:
-            # TODO: fetch full entity from API and convert with release_to_csl
-            raise NotImplementedError
-        else:
-            issued_date = None
-            if ref.year:
-                issued_date = [[ref.year]]
-            csl = dict(
-                title=ref.title,
-                issued=issued_date,
-            )
-        csl['id'] = ref.key or ref.index, # zero- or one-indexed?
-        ret.append(csl)
-    return ret
-
-
 def changelog_to_elasticsearch(entity):
 
     editgroup = entity.editgroup
diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py
new file mode 100644
index 00000000..b67df12d
--- /dev/null
+++ b/python/fatcat_tools/transforms/entities.py
@@ -0,0 +1,31 @@
+
+
+import collections
+from fatcat_client import ApiClient
+
+def entity_to_dict(entity, api_client=None):
+    """
+    Hack to take advantage of the code-generated serialization code.
+
+    Initializing/destroying ApiClient objects is surprisingly expensive
+    (because it involves a threadpool), so we allow passing an existing
+    instance. If you already have a full-on API connection `api`, you can
+    access the ApiClient object as `api.api_client`. This is such a speed-up
+    that this argument may become mandatory.
+    """
+    if not api_client:
+        api_client = ApiClient()
+    return api_client.sanitize_for_serialization(entity)
+
+def entity_from_json(json_str, entity_type, api_client=None):
+    """
+    Hack to take advantage of the code-generated deserialization code
+
+    See not on `entity_to_dict()` about api_client argument.
+    """
+    if not api_client:
+        api_client = ApiClient()
+    thing = collections.namedtuple('Thing', ['data'])
+    thing.data = json_str
+    return api_client.deserialize(thing, entity_type)
+
author	Bryan Newbold <bnewbold@robocracy.org>	2019-03-11 16:38:51 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2019-03-11 16:38:51 -0700
commit	655f7060eb5b5e711a8a892cb1085639c4aa8fd2 (patch)
tree	ffa1139e0c56b6510ec71d1aa8cc426423449f11 /python/fatcat_tools
parent	c937447f894cfde54628fecf3fa71127cb769f0c (diff)
download	fatcat-655f7060eb5b5e711a8a892cb1085639c4aa8fd2.tar.gz fatcat-655f7060eb5b5e711a8a892cb1085639c4aa8fd2.zip