caddy/caddyconfig/caddyfile/lexer.go
Francis Lavoie 96d6d277a4
caddyconfig: Don't start comments in middle of tokens (#3267)
* caddyconfig: Only parse # as start of comment if preceded by space

* caddyconfig: Simplify # logic using len(val), add a test
2020-05-05 12:32:12 -06:00

170 lines
3.4 KiB
Go
Executable file

// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package caddyfile
import (
"bufio"
"io"
"unicode"
)
type (
// lexer is a utility which can get values, token by
// token, from a Reader. A token is a word, and tokens
// are separated by whitespace. A word can be enclosed
// in quotes if it contains whitespace.
lexer struct {
reader *bufio.Reader
token Token
line int
skippedLines int
}
// Token represents a single parsable unit.
Token struct {
File string
Line int
Text string
}
)
// load prepares the lexer to scan an input for tokens.
// It discards any leading byte order mark.
func (l *lexer) load(input io.Reader) error {
l.reader = bufio.NewReader(input)
l.line = 1
// discard byte order mark, if present
firstCh, _, err := l.reader.ReadRune()
if err != nil {
return err
}
if firstCh != 0xFEFF {
err := l.reader.UnreadRune()
if err != nil {
return err
}
}
return nil
}
// next loads the next token into the lexer.
// A token is delimited by whitespace, unless
// the token starts with a quotes character (")
// in which case the token goes until the closing
// quotes (the enclosing quotes are not included).
// Inside quoted strings, quotes may be escaped
// with a preceding \ character. No other chars
// may be escaped. The rest of the line is skipped
// if a "#" character is read in. Returns true if
// a token was loaded; false otherwise.
func (l *lexer) next() bool {
var val []rune
var comment, quoted, btQuoted, escaped bool
makeToken := func() bool {
l.token.Text = string(val)
return true
}
for {
ch, _, err := l.reader.ReadRune()
if err != nil {
if len(val) > 0 {
return makeToken()
}
if err == io.EOF {
return false
}
panic(err)
}
if !escaped && !btQuoted && ch == '\\' {
escaped = true
continue
}
if quoted || btQuoted {
if quoted && escaped {
// all is literal in quoted area,
// so only escape quotes
if ch != '"' {
val = append(val, '\\')
}
escaped = false
} else {
if quoted && ch == '"' {
return makeToken()
}
if btQuoted && ch == '`' {
return makeToken()
}
}
if ch == '\n' {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
val = append(val, ch)
continue
}
if unicode.IsSpace(ch) {
if ch == '\r' {
continue
}
if ch == '\n' {
if escaped {
l.skippedLines++
escaped = false
} else {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
comment = false
}
if len(val) > 0 {
return makeToken()
}
continue
}
if ch == '#' && len(val) == 0 {
comment = true
}
if comment {
continue
}
if len(val) == 0 {
l.token = Token{Line: l.line}
if ch == '"' {
quoted = true
continue
}
if ch == '`' {
btQuoted = true
continue
}
}
if escaped {
val = append(val, '\\')
escaped = false
}
val = append(val, ch)
}
}