aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-01 17:20:22 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-01 17:20:22 +0200
commit25a4517383c03540bd3d7695e14f60c1bb20da2d (patch)
tree5f3df59b6c20162b68d8881199da2f6b9439ed40 /skate
parent00c08c0776972544bfae315b50081596079214f2 (diff)
downloadrefcat-25a4517383c03540bd3d7695e14f60c1bb20da2d.tar.gz
refcat-25a4517383c03540bd3d7695e14f60c1bb20da2d.zip
add mapper: MapperReleaseContainerName
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-map/main.go19
-rw-r--r--skate/map.go27
2 files changed, 32 insertions, 14 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 572ecec..060e324 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -67,15 +67,16 @@ func main() {
availableMappers := map[string]skate.Mapper{
// Add new mapper functions here. TODO: add more docs, and improve
// composability, e.g. like middleware.
- "id": skate.Identity,
- "ff": skate.CreateFixedMapper(*extraValue),
- "ti": skate.MapperTitle,
- "tn": skate.MapperTitleNormalized,
- "ty": skate.MapperTitleNysiis,
- "ts": skate.MapperTitleSandcrawler,
- "ru": skate.MapperURLFromRef,
- "cni": skate.MapperContainerName,
- "cns": skate.MapperContainerNameSandcrawler,
+ "id": skate.Identity,
+ "ff": skate.CreateFixedMapper(*extraValue),
+ "ti": skate.MapperTitle,
+ "tn": skate.MapperTitleNormalized,
+ "ty": skate.MapperTitleNysiis,
+ "ts": skate.MapperTitleSandcrawler,
+ "ru": skate.MapperURLFromRef,
+ "cni": skate.MapperContainerName,
+ "cns": skate.MapperContainerNameSandcrawler,
+ "rcns": skate.MapperReleaseContainerName,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index f812464..a06b5ae 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -24,16 +24,16 @@ type TitleDoc struct {
Title string `json:"title"`
}
-// ContainerNameDoc is a document with a container title.
+// ContainerNameDoc is a (ref) document with a container title.
type ContainerNameDoc struct {
Biblio struct {
ContainerName string `json:"container_name"`
} `json:"biblio"`
}
-// PartialDoc for ref docs, that do not have DOI or title. E.g. we found 49701699
-// (NCVY), 36401044 (NCVYU), 29668363 (NCUY), and so on. Some examples: XXX
-type PartialDoc struct {
+// PartialRef for ref docs, that do not have DOI or title. E.g. we found 49701699
+// (NCVY), 36401044 (NCVYU), 29668363 (NCUY), and so on. Some examples: XXX.
+type PartialRef struct {
ContainerName string `json:"container_name"`
Contribs []struct {
// XXX: Need a way to sensibly compare sets of author names.
@@ -200,7 +200,7 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) {
// MapperContainerName extracts (container_name, doc).
func MapperContainerName(p []byte) ([][]byte, error) {
var (
- doc PartialDoc
+ doc PartialRef
key []byte
)
if err := json.Unmarshal(p, &doc); err != nil {
@@ -240,6 +240,23 @@ func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
return fields, nil
}
+// MapperReleaseContainerName extracts a normalized container name.
+func MapperReleaseContainerName(p []byte) (fields [][]byte, err error) {
+ var (
+ doc Release
+ key []byte
+ )
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ if doc.Container.Name != "" {
+ key = []byte(sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.Container.Name))))
+ } else if doc.ContainerName != "" {
+ key = []byte(sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.ContainerName))))
+ }
+ return [][]byte{key, p}, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
return nil, nil