go_dreamfactory/lego/sys/codec/utils/string.go
2022-07-11 15:54:54 +08:00

350 lines
6.4 KiB
Go

package utils
import "unicode/utf8"
var htmlSafeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': false,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': false,
'=': true,
'>': false,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
var safeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': true,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': true,
'=': true,
'>': true,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
var hex = "0123456789abcdef"
func WriteStringSlowPathWithHTMLEscaped(buf *[]byte, i int, s string, valLen int) {
start := i
// for the remaining parts, we process them char by char
for i < valLen {
if b := s[i]; b < utf8.RuneSelf {
if htmlSafeSet[b] {
i++
continue
}
if start < i {
*buf = append(*buf, s[start:i]...)
}
switch b {
case '\\', '"':
*buf = append(*buf, '\\', b)
case '\n':
*buf = append(*buf, '\\', 'n')
case '\r':
*buf = append(*buf, '\\', 'r')
case '\t':
*buf = append(*buf, '\\', 't')
default:
*buf = append(*buf, `\u00`...)
*buf = append(*buf, hex[b>>4], hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
*buf = append(*buf, s[start:i]...)
}
*buf = append(*buf, `\ufffd`...)
i++
start = i
continue
}
if c == '\u2028' || c == '\u2029' {
if start < i {
*buf = append(*buf, s[start:i]...)
}
*buf = append(*buf, `\u202`...)
*buf = append(*buf, hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
*buf = append(*buf, s[start:]...)
}
*buf = append(*buf, '"')
}
func WriteStringSlowPath(buf *[]byte, i int, s string, valLen int) {
start := i
for i < valLen {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
i++
continue
}
if start < i {
*buf = append(*buf, s[start:i]...)
}
switch b {
case '\\', '"':
*buf = append(*buf, '\\', b)
case '\n':
*buf = append(*buf, '\\', 'n')
case '\r':
*buf = append(*buf, '\\', 'r')
case '\t':
*buf = append(*buf, '\\', 't')
default:
*buf = append(*buf, `\u00`...)
*buf = append(*buf, hex[b>>4], hex[b&0xF])
}
i++
start = i
continue
}
i++
continue
}
if start < len(s) {
*buf = append(*buf, s[start:]...)
}
*buf = append(*buf, '"')
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
maskx = 0x3F // 0011 1111
mask2 = 0x1F // 0001 1111
mask3 = 0x0F // 0000 1111
mask4 = 0x07 // 0000 0111
rune1Max = 1<<7 - 1
rune2Max = 1<<11 - 1
rune3Max = 1<<16 - 1
surrogateMin = 0xD800
surrogateMax = 0xDFFF
maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
)
func AppendRune(p []byte, r rune) []byte {
// Negative values are erroneous. Making it unsigned addresses the problem.
switch i := uint32(r); {
case i <= rune1Max:
p = append(p, byte(r))
return p
case i <= rune2Max:
p = append(p, t2|byte(r>>6))
p = append(p, tx|byte(r)&maskx)
return p
case i > maxRune, surrogateMin <= i && i <= surrogateMax:
r = runeError
fallthrough
case i <= rune3Max:
p = append(p, t3|byte(r>>12))
p = append(p, tx|byte(r>>6)&maskx)
p = append(p, tx|byte(r)&maskx)
return p
default:
p = append(p, t4|byte(r>>18))
p = append(p, tx|byte(r>>12)&maskx)
p = append(p, tx|byte(r>>6)&maskx)
p = append(p, tx|byte(r)&maskx)
return p
}
}