diff options
Diffstat (limited to 'skate/unstructured.go')
| -rw-r--r-- | skate/unstructured.go | 37 | 
1 files changed, 4 insertions, 33 deletions
| diff --git a/skate/unstructured.go b/skate/unstructured.go index f2c1d21..a172e8b 100644 --- a/skate/unstructured.go +++ b/skate/unstructured.go @@ -2,19 +2,12 @@ package skate  import (  	"regexp" -	"strings"  )  var ( -	PatDOI          = regexp.MustCompile(`10[.][0-9]{1,8}/[^ ]*[\w]`) -	PatDOINoHyphen  = regexp.MustCompile(`10[.][0-9]{1,8}/[^ -]*[\w]`) -	PatArxiv        = regexp.MustCompile(`https?://arxiv.org/(pdf|abs)/([0-9]{4,4}[.][0-9]{1,8})(v[0-9]{1,2})?(.pdf)?`) -	DOILinkPrefixes = []string{ -		"http://doi.org/", -		"http://dx.doi.org/", -		"https://doi.org/", -		"https://dx.doi.org/", -	} +	PatDOI         = regexp.MustCompile(`10[.][0-9]{1,8}/[^ ]*[\w]`) +	PatDOINoHyphen = regexp.MustCompile(`10[.][0-9]{1,8}/[^ -]*[\w]`) +	PatArxiv       = regexp.MustCompile(`https?://arxiv.org/(pdf|abs)/([0-9]{4,4}[.][0-9]{1,8})(v[0-9]{1,2})?(.pdf)?`)  )  // ParseUnstructured will in-place augment missing DOI, arxiv id and so on. @@ -24,32 +17,10 @@ func ParseUnstructured(ref *Ref) error {  		v   string  		vs  []string  	) -	// Handle things like: 10.1111/j.1550-7408.1968.tb02138.x-BIB5|cit5, -	// 10.1111/j.1558-5646.1997.tb02431.x-BIB0008|evo02431-cit-0008, ... -	if strings.Contains(strings.ToLower(ref.Key), "-bib") && ref.Biblio.DOI == "" { -		parts := strings.Split(strings.ToLower(ref.Key), "-bib") -		ref.Biblio.DOI = parts[0] -	}  	// DOI  	v = PatDOI.FindString(uns)  	if v != "" && ref.Biblio.DOI == "" { -		ref.Biblio.DOI = v -	} -	// DOI in Key -	v = PatDOINoHyphen.FindString(ref.Key) -	if v != "" && ref.Biblio.DOI == "" { -		ref.Biblio.DOI = v -	} -	// DOI in URL -	for _, prefix := range DOILinkPrefixes { -		if ref.Biblio.DOI != "" && strings.HasPrefix(ref.Biblio.Url, prefix) { -			ref.Biblio.DOI = strings.Replace(ref.Biblio.Url, prefix, "", -1) -		} -	} -	// Another DOI pattern. -	v = PatDOINoHyphen.FindString(ref.Key) -	if v != "" && ref.Biblio.DOI == "" { -		ref.Biblio.DOI = v +		ref.Biblio.DOI = SanitizeDOI(v)  	}  	// Arxiv  	vs = PatArxiv.FindStringSubmatch(uns) | 
