Support for unicode / international characters in dashboard title (improved slugify), Fixes #1734, Fixes #827

This commit is contained in:
Torkel Ödegaard
2015-04-15 12:36:02 +02:00
parent 6ba8854854
commit 942e8fdba8
28 changed files with 48228 additions and 5 deletions
@@ -0,0 +1,2 @@
_*
cover*.out
+54
View File
@@ -0,0 +1,54 @@
slug
====
Package `slug` generate slug from unicode string, URL-friendly slugify with
multiple languages support.
[![GoDoc](https://godoc.org/github.com/gosimple/slug?status.png)](https://godoc.org/github.com/gosimple/slug)
[![Build Status](https://drone.io/github.com/gosimple/slug/status.png)](https://drone.io/github.com/gosimple/slug/latest)
[Documentation online](http://godoc.org/github.com/gosimple/slug)
## Example
package main
import(
"github.com/gosimple/slug"
"fmt"
)
func main () {
text := slug.Make("Hellö Wörld хелло ворлд")
fmt.Println(text) // Will print hello-world-khello-vorld
someText := slug.Make("影師")
fmt.Println(someText) // Will print: ying-shi
enText := slug.MakeLang("This & that", "en")
fmt.Println(enText) // Will print 'this-and-that'
deText := slug.MakeLang("Diese & Dass", "de")
fmt.Println(deText) // Will print 'diese-und-dass'
slug.CustomSub = map[string]string{
"water": "sand",
}
textSub := slug.Make("water is hot")
fmt.Println(textSub) // Will print 'sand-is-hot'
}
### Requests or bugs?
<https://github.com/gosimple/slug/issues>
## Installation
go get -u github.com/gosimple/slug
## License
The source files are distributed under the
[Mozilla Public License, version 2.0](http://mozilla.org/MPL/2.0/),
unless otherwise noted.
Please read the [FAQ](http://www.mozilla.org/MPL/2.0/FAQ.html)
if you have further questions regarding the license.
+16
View File
@@ -0,0 +1,16 @@
// Copyright 2013 by Dobrosław Żybort. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package slug
var defaultSub = map[rune]string{
'"': "",
'\'': "",
'': "",
'': "-", // figure dash
'': "-", // en dash
'—': "-", // em dash
'―': "-", // horizontal bar
}
+43
View File
@@ -0,0 +1,43 @@
// Copyright 2013 by Dobrosław Żybort. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
Package slug generate slug from unicode string, URL-friendly slugify with
multiple languages support.
Example:
package main
import(
"github.com/gosimple/slug"
"fmt"
)
func main () {
text := slug.Make("Hellö Wörld хелло ворлд")
fmt.Println(text) // Will print hello-world-khello-vorld
someText := slug.Make("影師")
fmt.Println(someText) // Will print: ying-shi
enText := slug.MakeLang("This & that", "en")
fmt.Println(enText) // Will print 'this-and-that'
deText := slug.MakeLang("Diese & Dass", "de")
fmt.Println(deText) // Will print 'diese-und-dass'
slug.CustomSub = map[string]string{
"water": "sand",
}
textSub := slug.Make("water is hot")
fmt.Println(textSub) // Will print 'sand-is-hot'
}
Requests or bugs?
https://github.com/gosimple/slug/issues
*/
package slug
@@ -0,0 +1,26 @@
// Copyright 2013 by Dobrosław Żybort. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package slug
var deSub = map[rune]string{
'&': "und",
'@': "an",
}
var enSub = map[rune]string{
'&': "and",
'@': "at",
}
var plSub = map[rune]string{
'&': "i",
'@': "na",
}
var esSub = map[rune]string{
'&': "y",
'@': "en",
}
+122
View File
@@ -0,0 +1,122 @@
// Copyright 2013 by Dobrosław Żybort. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package slug
import (
"gopkgs.com/unidecode.v1"
"regexp"
"strings"
)
var (
// Custom substitution map
CustomSub map[string]string
// Custom rune substitution map
CustomRuneSub map[rune]string
// Maximum slug length. It's smart so it will cat slug after full word.
// By default slugs aren't shortened.
// If MaxLength is smaller than length of the first word, then returned
// slug will contain only substring from the first word truncated
// after MaxLength.
MaxLength int
)
//=============================================================================
// Make returns slug generated from provided string. Will use "en" as language
// substitution.
func Make(s string) (slug string) {
return MakeLang(s, "en")
}
// MakeLang returns slug generated from provided string and will use provided
// language for chars substitution.
func MakeLang(s string, lang string) (slug string) {
slug = strings.TrimSpace(s)
// Custom substitutions
// Always substitute runes first
slug = SubstituteRune(slug, CustomRuneSub)
slug = Substitute(slug, CustomSub)
// Process string with selected substitution language
switch lang {
case "de":
slug = SubstituteRune(slug, deSub)
case "en":
slug = SubstituteRune(slug, enSub)
case "pl":
slug = SubstituteRune(slug, plSub)
case "es":
slug = SubstituteRune(slug, esSub)
default: // fallback to "en" if lang not found
slug = SubstituteRune(slug, enSub)
}
slug = SubstituteRune(slug, defaultSub)
// Process all non ASCII symbols
slug = unidecode.Unidecode(slug)
slug = strings.ToLower(slug)
// Process all remaining symbols
slug = regexp.MustCompile("[^a-z0-9-_]").ReplaceAllString(slug, "-")
slug = regexp.MustCompile("-+").ReplaceAllString(slug, "-")
slug = strings.Trim(slug, "-")
if MaxLength > 0 {
slug = smartTruncate(slug)
}
return slug
}
// Substitute returns string with superseded all substrings from
// provided substitution map.
func Substitute(s string, sub map[string]string) (buf string) {
buf = s
for key, val := range sub {
buf = strings.Replace(s, key, val, -1)
}
return
}
// SubstituteRune substitutes string chars with provided rune
// substitution map.
func SubstituteRune(s string, sub map[rune]string) (buf string) {
for _, c := range s {
if d, ok := sub[c]; ok {
buf += d
} else {
buf += string(c)
}
}
return
}
func smartTruncate(text string) string {
if len(text) < MaxLength {
return text
}
var truncated string
words := strings.SplitAfter(text, "-")
// If MaxLength is smaller than length of the first word return word
// truncated after MaxLength.
if len(words[0]) > MaxLength {
return words[0][:MaxLength]
}
for _, word := range words {
if len(truncated)+len(word)-1 <= MaxLength {
truncated = truncated + word
} else {
break
}
}
return strings.Trim(truncated, "-")
}
+337
View File
@@ -0,0 +1,337 @@
// Copyright 2013 by Dobrosław Żybort. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package slug
import (
"testing"
)
//=============================================================================
func TestSlugMake(t *testing.T) {
var testCases = []struct {
in string
want string
}{
{"DOBROSLAWZYBORT", "dobroslawzybort"},
{"Dobroslaw Zybort", "dobroslaw-zybort"},
{" Dobroslaw Zybort ?", "dobroslaw-zybort"},
{"Dobrosław Żybort", "dobroslaw-zybort"},
{"Ala ma 6 kotów.", "ala-ma-6-kotow"},
{"áÁàÀãÃâÂäÄąĄą̊Ą̊", "aaaaaaaaaaaaaa"},
{"ćĆĉĈçÇ", "cccccc"},
{"éÉèÈẽẼêÊëËęĘ", "eeeeeeeeeeee"},
{"íÍìÌĩĨîÎïÏįĮ", "iiiiiiiiiiii"},
{"łŁ", "ll"},
{"ńŃ", "nn"},
{"óÓòÒõÕôÔöÖǫǪǭǬø", "ooooooooooooooo"},
{"śŚ", "ss"},
{"úÚùÙũŨûÛüÜųŲ", "uuuuuuuuuuuu"},
{"y̨Y̨", "yy"},
{"źŹżŹ", "zzzz"},
{"·/,:;`˜'\"", ""},
{"20002013", "2000-2013"},
{"style—not", "style-not"},
{"test_slug", "test_slug"},
{"Æ", "ae"},
{"Ich heiße", "ich-heisse"},
{"This & that", "this-and-that"},
{"fácil €", "facil-eu"},
{"smile ☺", "smile"},
{"Hellö Wörld хелло ворлд", "hello-world-khello-vorld"},
{"\"C'est déjà l’été.\"", "cest-deja-lete"},
{"jaja---lol-méméméoo--a", "jaja-lol-mememeoo-a"},
{"影師", "ying-shi"},
}
for index, st := range testCases {
got := Make(st.in)
if got != st.want {
t.Errorf(
"%d. Make(%#v) = %#v; want %#v",
index, st.in, got, st.want)
}
}
}
func TestSlugMakeLang(t *testing.T) {
var testCases = []struct {
lang string
in string
want string
}{
{"en", "This & that", "this-and-that"},
{"de", "This & that", "this-und-that"},
{"pl", "This & that", "this-i-that"},
{"es", "This & that", "this-y-that"},
{"test", "This & that", "this-and-that"}, // unknown lang, fallback to "en"
}
for index, smlt := range testCases {
got := MakeLang(smlt.in, smlt.lang)
if got != smlt.want {
t.Errorf(
"%d. MakeLang(%#v, %#v) = %#v; want %#v",
index, smlt.in, smlt.lang, got, smlt.want)
}
}
}
func TestSlugMakeUserSubstituteLang(t *testing.T) {
var testCases = []struct {
cSub map[string]string
lang string
in string
want string
}{
{map[string]string{"'": " "}, "en", "That's great", "that-s-great"},
{map[string]string{"&": "or"}, "en", "This & that", "this-or-that"}, // by default "&" => "and"
{map[string]string{"&": "or"}, "de", "This & that", "this-or-that"}, // by default "&" => "und"
}
for index, smust := range testCases {
CustomSub = smust.cSub
got := MakeLang(smust.in, smust.lang)
if got != smust.want {
t.Errorf(
"%d. %#v; MakeLang(%#v, %#v) = %#v; want %#v",
index, smust.cSub, smust.in, smust.lang,
got, smust.want)
}
}
}
func TestSlugMakeSubstituteOrderLang(t *testing.T) {
// Always substitute runes first
var testCases = []struct {
rSub map[rune]string
sSub map[string]string
in string
want string
}{
{map[rune]string{'o': "left"}, map[string]string{"o": "right"}, "o o", "left-left"},
{map[rune]string{'&': "down"}, map[string]string{"&": "up"}, "&", "down"},
}
for index, smsot := range testCases {
CustomRuneSub = smsot.rSub
CustomSub = smsot.sSub
got := Make(smsot.in)
if got != smsot.want {
t.Errorf(
"%d. %#v; %#v; Make(%#v) = %#v; want %#v",
index, smsot.rSub, smsot.sSub, smsot.in,
got, smsot.want)
}
}
}
func TestSubstituteLang(t *testing.T) {
var testCases = []struct {
cSub map[string]string
in string
want string
}{
{map[string]string{"o": "no"}, "o o o", "no no no"},
{map[string]string{"'": " "}, "That's great", "That s great"},
}
for index, sst := range testCases {
got := Substitute(sst.in, sst.cSub)
if got != sst.want {
t.Errorf(
"%d. Substitute(%#v, %#v) = %#v; want %#v",
index, sst.in, sst.cSub, got, sst.want)
}
}
}
func TestSubstituteRuneLang(t *testing.T) {
var testCases = []struct {
cSub map[rune]string
in string
want string
}{
{map[rune]string{'o': "no"}, "o o o", "no no no"},
{map[rune]string{'\'': " "}, "That's great", "That s great"},
}
for index, ssrt := range testCases {
got := SubstituteRune(ssrt.in, ssrt.cSub)
if got != ssrt.want {
t.Errorf(
"%d. SubstituteRune(%#v, %#v) = %#v; want %#v",
index, ssrt.in, ssrt.cSub, got, ssrt.want)
}
}
}
func TestSlugMakeSmartTruncate(t *testing.T) {
var testCases = []struct {
in string
maxLength int
want string
}{
{"DOBROSLAWZYBORT", 100, "dobroslawzybort"},
{"Dobroslaw Zybort", 100, "dobroslaw-zybort"},
{"Dobroslaw Zybort", 12, "dobroslaw"},
{" Dobroslaw Zybort ?", 12, "dobroslaw"},
{"Ala ma 6 kotów.", 10, "ala-ma-6"},
{"Dobrosław Żybort", 5, "dobro"},
}
for index, smstt := range testCases {
MaxLength = smstt.maxLength
got := Make(smstt.in)
if got != smstt.want {
t.Errorf(
"%d. MaxLength = %v; Make(%#v) = %#v; want %#v",
index, smstt.maxLength, smstt.in, got, smstt.want)
}
}
}
func BenchmarkMakeShortAscii(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Make("Hello world")
}
}
func BenchmarkMakeShort(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Make("хелло ворлд")
}
}
func BenchmarkMakeShortSymbols(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Make("·/,:;`˜'\" &€£¥")
}
}
func BenchmarkMakeMediumAscii(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Make("ABCDE FGHIJ KLMNO PQRST UWXYZ ABCDE FGHIJ KLMNO PQRST UWXYZ ABCDE")
}
}
func BenchmarkMakeMedium(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Make("ヲァィゥェ ォャュョッ ーアイウエ オカキクケ コサシスセ ソタチツテ トナニヌネ ノハヒフヘ ホマミムメ モヤユヨラ リルレロワ")
}
}
func BenchmarkMakeLongAscii(b *testing.B) {
longStr := "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi " +
"pulvinar sodales ultrices. Nulla facilisi. Sed at vestibulum erat. Ut " +
"sit amet urna posuere, sagittis eros ac, varius nisi. Morbi ullamcorper " +
"odio at nunc pulvinar mattis. Vestibulum rutrum, ante eu dictum mattis, " +
"elit risus finibus nunc, consectetur facilisis eros leo ut sapien. Sed " +
"pulvinar volutpat mi. Cras semper mi ac eros accumsan, at feugiat massa " +
"elementum. Morbi eget dolor sit amet purus condimentum egestas non ut " +
"sapien. Duis feugiat magna vitae nisi lobortis, quis finibus sem " +
"sollicitudin. Pellentesque eleifend blandit ipsum, ut porta arcu " +
"ultricies et. Fusce vel ipsum porta, placerat diam ac, consectetur " +
"magna. Nulla in porta sem. Suspendisse commodo, felis in molestie " +
"ultricies, arcu ipsum aliquet turpis, elementum dapibus ipsum lorem a " +
"nisl. Etiam varius imperdiet placerat. Aliquam euismod lacus arcu, " +
"ultrices hendrerit est pellentesque vel. Aliquam sit amet laoreet leo. " +
"Integer eros libero, mollis sed posuere."
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
Make(longStr)
}
}
func BenchmarkSubstituteRuneShort(b *testing.B) {
shortStr := "Hello/Hi world"
subs := map[rune]string{'o': "no", '/': "slash"}
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
SubstituteRune(shortStr, subs)
}
}
func BenchmarkSubstituteRuneLong(b *testing.B) {
longStr := "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi " +
"pulvinar sodales ultrices. Nulla facilisi. Sed at vestibulum erat. Ut " +
"sit amet urna posuere, sagittis eros ac, varius nisi. Morbi ullamcorper " +
"odio at nunc pulvinar mattis. Vestibulum rutrum, ante eu dictum mattis, " +
"elit risus finibus nunc, consectetur facilisis eros leo ut sapien. Sed " +
"pulvinar volutpat mi. Cras semper mi ac eros accumsan, at feugiat massa " +
"elementum. Morbi eget dolor sit amet purus condimentum egestas non ut " +
"sapien. Duis feugiat magna vitae nisi lobortis, quis finibus sem " +
"sollicitudin. Pellentesque eleifend blandit ipsum, ut porta arcu " +
"ultricies et. Fusce vel ipsum porta, placerat diam ac, consectetur " +
"magna. Nulla in porta sem. Suspendisse commodo, felis in molestie " +
"ultricies, arcu ipsum aliquet turpis, elementum dapibus ipsum lorem a " +
"nisl. Etiam varius imperdiet placerat. Aliquam euismod lacus arcu, " +
"ultrices hendrerit est pellentesque vel. Aliquam sit amet laoreet leo. " +
"Integer eros libero, mollis sed posuere."
subs := map[rune]string{
'o': "no",
'/': "slash",
'i': "done",
'E': "es",
'a': "ASD",
'1': "one",
'l': "onetwo",
}
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
SubstituteRune(longStr, subs)
}
}
func BenchmarkSmartTruncateShort(b *testing.B) {
shortStr := "Hello-world"
MaxLength = 8
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
smartTruncate(shortStr)
}
}
func BenchmarkSmartTruncateLong(b *testing.B) {
longStr := "Lorem-ipsum-dolor-sit-amet,-consectetur-adipiscing-elit.-Morbi-" +
"pulvinar-sodales-ultrices.-Nulla-facilisi.-Sed-at-vestibulum-erat.-Ut-" +
"sit-amet-urna-posuere,-sagittis-eros-ac,-varius-nisi.-Morbi-ullamcorper-" +
"odio-at-nunc-pulvinar-mattis.-Vestibulum-rutrum,-ante-eu-dictum-mattis,-" +
"elit-risus-finibus-nunc,-consectetur-facilisis-eros-leo-ut-sapien.-Sed-" +
"pulvinar-volutpat-mi.-Cras-semper-mi-ac-eros-accumsan,-at-feugiat-massa-" +
"elementum.-Morbi-eget-dolor-sit-amet-purus-condimentum-egestas-non-ut-" +
"sapien.-Duis-feugiat-magna-vitae-nisi-lobortis,-quis-finibus-sem-" +
"sollicitudin.-Pellentesque-eleifend-blandit-ipsum,-ut-porta-arcu-" +
"ultricies-et.-Fusce-vel-ipsum-porta,-placerat-diam-ac,-consectetur-" +
"magna.-Nulla-in-porta-sem.-Suspendisse-commodo,-felis-in-molestie-" +
"ultricies,-arcu-ipsum-aliquet-turpis,-elementum-dapibus-ipsum-lorem-a-" +
"nisl.-Etiam-varius-imperdiet-placerat.-Aliquam-euismod-lacus-arcu,-" +
"ultrices-hendrerit-est-pellentesque-vel.-Aliquam-sit-amet-laoreet-leo.-" +
"Integer-eros-libero,-mollis-sed-posuere."
MaxLength = 256
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
smartTruncate(longStr)
}
}