1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

ProcessWord support UTF-8

modified PorcessWord to working normally for UTF-8 strings and added test cases

Signed-off-by: Daehyeok Mun <daehyeok@gmail.com>
This commit is contained in:
Daehyeok Mun 2015-10-18 21:55:53 -06:00
parent bb5551746b
commit bb79b7eb9e
3 changed files with 99 additions and 51 deletions

View file

@ -9,13 +9,15 @@ package dockerfile
import ( import (
"fmt" "fmt"
"strings" "strings"
"text/scanner"
"unicode" "unicode"
) )
type shellWord struct { type shellWord struct {
word string word string
envs []string scanner scanner.Scanner
pos int envs []string
pos int
} }
// ProcessWord will use the 'env' list of environment variables, // ProcessWord will use the 'env' list of environment variables,
@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) {
envs: env, envs: env,
pos: 0, pos: 0,
} }
sw.scanner.Init(strings.NewReader(word))
return sw.process() return sw.process()
} }
func (sw *shellWord) process() (string, error) { func (sw *shellWord) process() (string, error) {
return sw.processStopOn('\000') return sw.processStopOn(scanner.EOF)
} }
// Process the word, starting at 'pos', and stop when we get to the // Process the word, starting at 'pos', and stop when we get to the
@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
'$': sw.processDollar, '$': sw.processDollar,
} }
for sw.pos < len(sw.word) { for sw.scanner.Peek() != scanner.EOF {
ch := sw.peek() ch := sw.scanner.Peek()
if stopChar != '\000' && ch == stopChar {
sw.next() if stopChar != scanner.EOF && ch == stopChar {
sw.scanner.Next()
break break
} }
if fn, ok := charFuncMapping[ch]; ok { if fn, ok := charFuncMapping[ch]; ok {
@ -58,14 +62,19 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
result += tmp result += tmp
} else { } else {
// Not special, just add it to the result // Not special, just add it to the result
ch = sw.next() ch = sw.scanner.Next()
if ch == '\\' { if ch == '\\' {
// '\' escapes, except end of line // '\' escapes, except end of line
ch = sw.next()
if ch == '\000' { ch = sw.scanner.Next()
continue
if ch == scanner.EOF {
break
} }
} }
result += string(ch) result += string(ch)
} }
} }
@ -73,36 +82,21 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
return result, nil return result, nil
} }
func (sw *shellWord) peek() rune {
if sw.pos == len(sw.word) {
return '\000'
}
return rune(sw.word[sw.pos])
}
func (sw *shellWord) next() rune {
if sw.pos == len(sw.word) {
return '\000'
}
ch := rune(sw.word[sw.pos])
sw.pos++
return ch
}
func (sw *shellWord) processSingleQuote() (string, error) { func (sw *shellWord) processSingleQuote() (string, error) {
// All chars between single quotes are taken as-is // All chars between single quotes are taken as-is
// Note, you can't escape ' // Note, you can't escape '
var result string var result string
sw.next() sw.scanner.Next()
for { for {
ch := sw.next() ch := sw.scanner.Next()
if ch == '\000' || ch == '\'' { if ch == '\'' || ch == scanner.EOF {
break break
} }
result += string(ch) result += string(ch)
} }
return result, nil return result, nil
} }
@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
// But you can escape " with a \ // But you can escape " with a \
var result string var result string
sw.next() sw.scanner.Next()
for sw.pos < len(sw.word) { for sw.scanner.Peek() != scanner.EOF {
ch := sw.peek() ch := sw.scanner.Peek()
if ch == '"' { if ch == '"' {
sw.next() sw.scanner.Next()
break break
} }
if ch == '$' { if ch == '$' {
@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
} }
result += tmp result += tmp
} else { } else {
ch = sw.next() ch = sw.scanner.Next()
if ch == '\\' { if ch == '\\' {
chNext := sw.peek() chNext := sw.scanner.Peek()
if chNext == '\000' { if chNext == scanner.EOF {
// Ignore \ at end of word // Ignore \ at end of word
continue continue
} }
if chNext == '"' || chNext == '$' { if chNext == '"' || chNext == '$' {
// \" and \$ can be escaped, all other \'s are left as-is // \" and \$ can be escaped, all other \'s are left as-is
ch = sw.next() ch = sw.scanner.Next()
} }
} }
result += string(ch) result += string(ch)
@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
} }
func (sw *shellWord) processDollar() (string, error) { func (sw *shellWord) processDollar() (string, error) {
sw.next() sw.scanner.Next()
ch := sw.peek() ch := sw.scanner.Peek()
if ch == '{' { if ch == '{' {
sw.next() sw.scanner.Next()
name := sw.processName() name := sw.processName()
ch = sw.peek() ch = sw.scanner.Peek()
if ch == '}' { if ch == '}' {
// Normal ${xx} case // Normal ${xx} case
sw.next() sw.scanner.Next()
return sw.getEnv(name), nil return sw.getEnv(name), nil
} }
if ch == ':' { if ch == ':' {
// Special ${xx:...} format processing // Special ${xx:...} format processing
// Yes it allows for recursive $'s in the ... spot // Yes it allows for recursive $'s in the ... spot
sw.next() // skip over : sw.scanner.Next() // skip over :
modifier := sw.next() modifier := sw.scanner.Next()
word, err := sw.processStopOn('}') word, err := sw.processStopOn('}')
if err != nil { if err != nil {
@ -207,16 +201,16 @@ func (sw *shellWord) processName() string {
// If it starts with a numeric then just return $# // If it starts with a numeric then just return $#
var name string var name string
for sw.pos < len(sw.word) { for sw.scanner.Peek() != scanner.EOF {
ch := sw.peek() ch := sw.scanner.Peek()
if len(name) == 0 && unicode.IsDigit(ch) { if len(name) == 0 && unicode.IsDigit(ch) {
ch = sw.next() ch = sw.scanner.Next()
return string(ch) return string(ch)
} }
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' { if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
break break
} }
ch = sw.next() ch = sw.scanner.Next()
name += string(ch) name += string(ch)
} }

View file

@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) {
defer file.Close() defer file.Close()
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
envs := []string{"PWD=/home", "SHELL=bash"} envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"}
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()

View file

@ -56,3 +56,57 @@ he${PWD:=000}xx | error
he${PWD:+${PWD}:}xx | he/home:xx he${PWD:+${PWD}:}xx | he/home:xx
he${XXX:-\$PWD:}xx | he$PWD:xx he${XXX:-\$PWD:}xx | he$PWD:xx
he${XXX:-\${PWD}z}xx | he${PWDz}xx he${XXX:-\${PWD}z}xx | he${PWDz}xx
안녕하세요 | 안녕하세요
안'녕'하세요 | 안녕하세요
안'녕하세요 | 안녕하세요
안녕\'하세요 | 안녕'하세요
안\\'녕하세요 | 안\녕하세요
안녕\t하세요 | 안녕t하세요
"안녕\t하세요" | 안녕\t하세요
'안녕\t하세요 | 안녕\t하세요
안녕하세요\ | 안녕하세요
안녕하세요\\ | 안녕하세요\
"안녕하세요 | 안녕하세요
"안녕하세요\" | 안녕하세요"
"안녕'하세요" | 안녕'하세요
'안녕하세요 | 안녕하세요
'안녕하세요\' | 안녕하세요\
안녕$1x | 안녕x
안녕$.x | 안녕$.x
안녕$pwd. | 안녕.
안녕$PWD | 안녕/home
안녕\$PWD | 안녕$PWD
안녕\\$PWD | 안녕\/home
안녕\${} | 안녕${}
안녕\${}xx | 안녕${}xx
안녕${} | 안녕
안녕${}xx | 안녕xx
안녕${hi} | 안녕
안녕${hi}xx | 안녕xx
안녕${PWD} | 안녕/home
안녕${.} | error
안녕${XXX:-000}xx | 안녕000xx
안녕${PWD:-000}xx | 안녕/homexx
안녕${XXX:-$PWD}xx | 안녕/homexx
안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx
안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx
안녕${XXX:YYY} | error
안녕${XXX:+${PWD}}xx | 안녕xx
안녕${PWD:+${XXX}}xx | 안녕xx
안녕${PWD:+${SHELL}}xx | 안녕bashxx
안녕${XXX:+000}xx | 안녕xx
안녕${PWD:+000}xx | 안녕000xx
'안녕${XX}' | 안녕${XX}
"안녕${PWD}" | 안녕/home
"안녕'$PWD'" | 안녕'/home'
'"안녕"' | "안녕"
안녕\$PWD | 안녕$PWD
"안녕\$PWD" | 안녕$PWD
'안녕\$PWD' | 안녕\$PWD
안녕${PWD | error
안녕${PWD:=000}xx | error
안녕${PWD:+${PWD}:}xx | 안녕/home:xx
안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx
안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx
$KOREAN | 한국어
안녕$KOREAN | 안녕한국어