caddyfile: Implement heredoc support (#5385)

This commit is contained in:
Francis Lavoie 2023-02-25 19:34:27 -05:00 committed by GitHub
parent 9e6919550b
commit 960150bb03
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 397 additions and 135 deletions

View file

@ -20,7 +20,6 @@ import (
"io"
"log"
"strconv"
"strings"
)
// Dispenser is a type that dispenses tokens, similarly to a lexer,
@ -101,12 +100,12 @@ func (d *Dispenser) nextOnSameLine() bool {
d.cursor++
return true
}
if d.cursor >= len(d.tokens) {
if d.cursor >= len(d.tokens)-1 {
return false
}
if d.cursor < len(d.tokens)-1 &&
d.tokens[d.cursor].File == d.tokens[d.cursor+1].File &&
d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) == d.tokens[d.cursor+1].Line {
curr := d.tokens[d.cursor]
next := d.tokens[d.cursor+1]
if curr.File == next.File && curr.Line+curr.NumLineBreaks() == next.Line {
d.cursor++
return true
}
@ -122,12 +121,12 @@ func (d *Dispenser) NextLine() bool {
d.cursor++
return true
}
if d.cursor >= len(d.tokens) {
if d.cursor >= len(d.tokens)-1 {
return false
}
if d.cursor < len(d.tokens)-1 &&
(d.tokens[d.cursor].File != d.tokens[d.cursor+1].File ||
d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) < d.tokens[d.cursor+1].Line) {
curr := d.tokens[d.cursor]
next := d.tokens[d.cursor+1]
if curr.File != next.File || curr.Line+curr.NumLineBreaks() < next.Line {
d.cursor++
return true
}
@ -203,14 +202,17 @@ func (d *Dispenser) Val() string {
}
// ValRaw gets the raw text of the current token (including quotes).
// If the token was a heredoc, then the delimiter is not included,
// because that is not relevant to any unmarshaling logic at this time.
// If there is no token loaded, it returns empty string.
func (d *Dispenser) ValRaw() string {
if d.cursor < 0 || d.cursor >= len(d.tokens) {
return ""
}
quote := d.tokens[d.cursor].wasQuoted
if quote > 0 {
return string(quote) + d.tokens[d.cursor].Text + string(quote) // string literal
if quote > 0 && quote != '<' {
// string literal
return string(quote) + d.tokens[d.cursor].Text + string(quote)
}
return d.tokens[d.cursor].Text
}
@ -438,14 +440,14 @@ func (d *Dispenser) Delete() []Token {
return d.tokens
}
// numLineBreaks counts how many line breaks are in the token
// value given by the token index tknIdx. It returns 0 if the
// token does not exist or there are no line breaks.
func (d *Dispenser) numLineBreaks(tknIdx int) int {
if tknIdx < 0 || tknIdx >= len(d.tokens) {
return 0
// DeleteN is the same as Delete, but can delete many tokens at once.
// If there aren't N tokens available to delete, none are deleted.
func (d *Dispenser) DeleteN(amount int) []Token {
if amount > 0 && d.cursor >= (amount-1) && d.cursor <= len(d.tokens)-1 {
d.tokens = append(d.tokens[:d.cursor-(amount-1)], d.tokens[d.cursor+1:]...)
d.cursor -= amount
}
return strings.Count(d.tokens[tknIdx].Text, "\n")
return d.tokens
}
// isNewLine determines whether the current token is on a different
@ -468,18 +470,10 @@ func (d *Dispenser) isNewLine() bool {
return true
}
// The previous token may contain line breaks if
// it was quoted and spanned multiple lines. e.g:
//
// dir "foo
// bar
// baz"
prevLineBreaks := d.numLineBreaks(d.cursor - 1)
// If the previous token (incl line breaks) ends
// on a line earlier than the current token,
// then the current token is on a new line
return prev.Line+prevLineBreaks < curr.Line
return prev.Line+prev.NumLineBreaks() < curr.Line
}
// isNextOnNewLine determines whether the current token is on a different
@ -502,16 +496,8 @@ func (d *Dispenser) isNextOnNewLine() bool {
return true
}
// The current token may contain line breaks if
// it was quoted and spanned multiple lines. e.g:
//
// dir "foo
// bar
// baz"
currLineBreaks := d.numLineBreaks(d.cursor)
// If the current token (incl line breaks) ends
// on a line earlier than the next token,
// then the next token is on a new line
return curr.Line+currLineBreaks < next.Line
return curr.Line+curr.NumLineBreaks() < next.Line
}

View file

@ -17,7 +17,10 @@ package caddyfile
import (
"bufio"
"bytes"
"fmt"
"io"
"regexp"
"strings"
"unicode"
)
@ -35,30 +38,39 @@ type (
// Token represents a single parsable unit.
Token struct {
File string
origFile string
Line int
Text string
wasQuoted rune // enclosing quote character, if any
snippetName string
File string
origFile string
Line int
Text string
wasQuoted rune // enclosing quote character, if any
heredocMarker string
snippetName string
}
)
// originalFile gets original filename before import modification.
func (t Token) originalFile() string {
if t.origFile != "" {
return t.origFile
// Tokenize takes bytes as input and lexes it into
// a list of tokens that can be parsed as a Caddyfile.
// Also takes a filename to fill the token's File as
// the source of the tokens, which is important to
// determine relative paths for `import` directives.
func Tokenize(input []byte, filename string) ([]Token, error) {
l := lexer{}
if err := l.load(bytes.NewReader(input)); err != nil {
return nil, err
}
return t.File
}
// updateFile updates the token's source filename for error display
// and remembers the original filename. Used during "import" processing.
func (t *Token) updateFile(file string) {
if t.origFile == "" {
t.origFile = t.File
var tokens []Token
for {
found, err := l.next()
if err != nil {
return nil, err
}
if !found {
break
}
l.token.File = filename
tokens = append(tokens, l.token)
}
t.File = file
return tokens, nil
}
// load prepares the lexer to scan an input for tokens.
@ -92,28 +104,93 @@ func (l *lexer) load(input io.Reader) error {
// may be escaped. The rest of the line is skipped
// if a "#" character is read in. Returns true if
// a token was loaded; false otherwise.
func (l *lexer) next() bool {
func (l *lexer) next() (bool, error) {
var val []rune
var comment, quoted, btQuoted, escaped bool
var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool
var heredocMarker string
makeToken := func(quoted rune) bool {
l.token.Text = string(val)
l.token.wasQuoted = quoted
l.token.heredocMarker = heredocMarker
return true
}
for {
// Read a character in; if err then if we had
// read some characters, make a token. If we
// reached EOF, then no more tokens to read.
// If no EOF, then we had a problem.
ch, _, err := l.reader.ReadRune()
if err != nil {
if len(val) > 0 {
return makeToken(0)
if inHeredoc {
return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker)
}
return makeToken(0), nil
}
if err == io.EOF {
return false
return false, nil
}
panic(err)
return false, err
}
// detect whether we have the start of a heredoc
if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" {
if ch == '<' {
return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line)
}
if ch == '\r' {
continue
}
// after hitting a newline, we know that the heredoc marker
// is the characters after the two << and the newline.
// we reset the val because the heredoc is syntax we don't
// want to keep.
if ch == '\n' {
heredocMarker = string(val[2:])
if !heredocMarkerRegexp.Match([]byte(heredocMarker)) {
return false, fmt.Errorf("heredoc marker on line #%d must contain only alpha-numeric characters, dashes and underscores; got '%s'", l.line, heredocMarker)
}
inHeredoc = true
l.skippedLines++
val = nil
continue
}
val = append(val, ch)
continue
}
// if we're in a heredoc, all characters are read as-is
if inHeredoc {
val = append(val, ch)
if ch == '\n' {
l.skippedLines++
}
// check if we're done, i.e. that the last few characters are the marker
if len(val) > len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) {
// set the final value
val, err = l.finalizeHeredoc(val, heredocMarker)
if err != nil {
return false, err
}
// set the line counter, and make the token
l.line += l.skippedLines
l.skippedLines = 0
return makeToken('<'), nil
}
// stay in the heredoc until we find the ending marker
continue
}
// track whether we found an escape '\' for the next
// iteration to be contextually aware
if !escaped && !btQuoted && ch == '\\' {
escaped = true
continue
@ -128,26 +205,29 @@ func (l *lexer) next() bool {
}
escaped = false
} else {
if quoted && ch == '"' {
return makeToken('"')
}
if btQuoted && ch == '`' {
return makeToken('`')
if (quoted && ch == '"') || (btQuoted && ch == '`') {
return makeToken(ch), nil
}
}
// allow quoted text to wrap continue on multiple lines
if ch == '\n' {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
// collect this character as part of the quoted token
val = append(val, ch)
continue
}
if unicode.IsSpace(ch) {
// ignore CR altogether, we only actually care about LF (\n)
if ch == '\r' {
continue
}
// end of the line
if ch == '\n' {
// newlines can be escaped to chain arguments
// onto multiple lines; else, increment the line count
if escaped {
l.skippedLines++
escaped = false
@ -155,14 +235,18 @@ func (l *lexer) next() bool {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
// comments (#) are single-line only
comment = false
}
// any kind of space means we're at the end of this token
if len(val) > 0 {
return makeToken(0)
return makeToken(0), nil
}
continue
}
// comments must be at the start of a token,
// in other words, preceded by space or newline
if ch == '#' && len(val) == 0 {
comment = true
}
@ -183,7 +267,12 @@ func (l *lexer) next() bool {
}
if escaped {
val = append(val, '\\')
// allow escaping the first < to skip the heredoc syntax
if ch == '<' {
heredocEscaped = true
} else {
val = append(val, '\\')
}
escaped = false
}
@ -191,24 +280,71 @@ func (l *lexer) next() bool {
}
}
// Tokenize takes bytes as input and lexes it into
// a list of tokens that can be parsed as a Caddyfile.
// Also takes a filename to fill the token's File as
// the source of the tokens, which is important to
// determine relative paths for `import` directives.
func Tokenize(input []byte, filename string) ([]Token, error) {
l := lexer{}
if err := l.load(bytes.NewReader(input)); err != nil {
return nil, err
// finalizeHeredoc takes the runes read as the heredoc text and the marker,
// and processes the text to strip leading whitespace, returning the final
// value without the leading whitespace.
func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) {
// find the last newline of the heredoc, which is where the contents end
lastNewline := strings.LastIndex(string(val), "\n")
// collapse the content, then split into separate lines
lines := strings.Split(string(val[:lastNewline+1]), "\n")
// figure out how much whitespace we need to strip from the front of every line
// by getting the string that precedes the marker, on the last line
paddingToStrip := string(val[lastNewline+1 : len(val)-len(marker)])
// iterate over each line and strip the whitespace from the front
var out string
for lineNum, lineText := range lines[:len(lines)-1] {
// find an exact match for the padding
index := strings.Index(lineText, paddingToStrip)
// if the padding doesn't match exactly at the start then we can't safely strip
if index != 0 {
return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip)
}
// strip, then append the line, with the newline, to the output.
// also removes all "\r" because Windows.
out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "")
}
var tokens []Token
for l.next() {
l.token.File = filename
tokens = append(tokens, l.token)
// return the final value
return []rune(out), nil
}
// originalFile gets original filename before import modification.
func (t Token) originalFile() string {
if t.origFile != "" {
return t.origFile
}
return tokens, nil
return t.File
}
// updateFile updates the token's source filename for error display
// and remembers the original filename. Used during "import" processing.
func (t *Token) updateFile(file string) {
if t.origFile == "" {
t.origFile = t.File
}
t.File = file
}
func (t Token) Quoted() bool {
return t.wasQuoted > 0
}
// NumLineBreaks counts how many line breaks are in the token text.
func (t Token) NumLineBreaks() int {
lineBreaks := strings.Count(t.Text, "\n")
if t.wasQuoted == '<' {
// heredocs have an extra linebreak because the opening
// delimiter is on its own line and is not included in
// the token Text itself
lineBreaks++
}
return lineBreaks
}
var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$")

View file

@ -18,13 +18,13 @@ import (
"testing"
)
type lexerTestCase struct {
input []byte
expected []Token
}
func TestLexer(t *testing.T) {
testCases := []lexerTestCase{
testCases := []struct {
input []byte
expected []Token
expectErr bool
errorMessage string
}{
{
input: []byte(`host:123`),
expected: []Token{
@ -249,10 +249,123 @@ func TestLexer(t *testing.T) {
{Line: 1, Text: `quotes`},
},
},
{
input: []byte(`heredoc <<EOF
content
EOF same-line-arg
`),
expected: []Token{
{Line: 1, Text: `heredoc`},
{Line: 1, Text: "content\n"},
{Line: 3, Text: `same-line-arg`},
},
},
{
input: []byte(`heredoc <<VERY-LONG-MARKER
content
VERY-LONG-MARKER same-line-arg
`),
expected: []Token{
{Line: 1, Text: `heredoc`},
{Line: 1, Text: "content\n"},
{Line: 3, Text: `same-line-arg`},
},
},
{
input: []byte(`heredoc <<EOF
content
EOF same-line-arg
`),
expected: []Token{
{Line: 1, Text: `heredoc`},
{Line: 1, Text: "content\n"},
{Line: 3, Text: `same-line-arg`},
},
},
{
input: []byte(`prev-line
heredoc <<EOF
multi
line
content
EOF same-line-arg
next-line
`),
expected: []Token{
{Line: 1, Text: `prev-line`},
{Line: 2, Text: `heredoc`},
{Line: 2, Text: "\tmulti\n\tline\n\tcontent\n"},
{Line: 6, Text: `same-line-arg`},
{Line: 7, Text: `next-line`},
},
},
{
input: []byte(`heredoc <EOF
content
EOF same-line-arg
`),
expected: []Token{
{Line: 1, Text: `heredoc`},
{Line: 1, Text: `<EOF`},
{Line: 2, Text: `content`},
{Line: 3, Text: `EOF`},
{Line: 3, Text: `same-line-arg`},
},
},
{
input: []byte(`heredoc <<HERE SAME LINE
content
HERE same-line-arg
`),
expectErr: true,
errorMessage: "heredoc marker on line #1 must contain only alpha-numeric characters, dashes and underscores; got 'HERE SAME LINE'",
},
{
input: []byte(`heredoc <<<EOF
content
EOF same-line-arg
`),
expectErr: true,
errorMessage: "too many '<' for heredoc on line #1; only use two, for example <<END",
},
{
input: []byte(`heredoc <<EOF
content
`),
expectErr: true,
errorMessage: "incomplete heredoc <<EOF on line #3, expected ending marker EOF",
},
{
input: []byte(`heredoc <<EOF
content
EOF
`),
expectErr: true,
errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [\tcontent], expected whitespace [\t\t] to match the closing marker",
},
{
input: []byte(`heredoc <<EOF
content
EOF
`),
expectErr: true,
errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [ content], expected whitespace [\t\t] to match the closing marker",
},
}
for i, testCase := range testCases {
actual, err := Tokenize(testCase.input, "")
if testCase.expectErr {
if err == nil {
t.Errorf("expected error, got actual: %v", actual)
continue
}
if err.Error() != testCase.errorMessage {
t.Errorf("expected error '%v', got: %v", testCase.errorMessage, err)
}
continue
}
if err != nil {
t.Errorf("%v", err)
}

View file

@ -1 +1 @@
{args.0}
{args[0]}

View file

@ -1 +1 @@
{args.0} {args.1}
{args[0]} {args[1]}

View file

@ -0,0 +1,50 @@
example.com {
respond <<EOF
<html>
<head><title>Foo</title>
<body>Foo</body>
</html>
EOF 200
}
----------
{
"apps": {
"http": {
"servers": {
"srv0": {
"listen": [
":443"
],
"routes": [
{
"match": [
{
"host": [
"example.com"
]
}
],
"handle": [
{
"handler": "subroute",
"routes": [
{
"handle": [
{
"body": "\u003chtml\u003e\n \u003chead\u003e\u003ctitle\u003eFoo\u003c/title\u003e\n \u003cbody\u003eFoo\u003c/body\u003e\n\u003c/html\u003e\n",
"handler": "static_response",
"status_code": 200
}
]
}
]
}
],
"terminal": true
}
]
}
}
}
}
}

View file

@ -1,6 +1,6 @@
(logging) {
log {
output file /var/log/caddy/{args.0}.access.log
output file /var/log/caddy/{args[0]}.access.log
}
}

View file

@ -6,7 +6,7 @@ https://example.com {
method GET
rewrite /rewritten?uri={uri}
buffer_requests
request_buffers 4KB
transport http {
read_buffer 10MB
@ -54,7 +54,6 @@ https://example.com {
{
"handle": [
{
"buffer_requests": true,
"handler": "reverse_proxy",
"headers": {
"request": {
@ -68,6 +67,7 @@ https://example.com {
}
}
},
"request_buffers": 4000,
"rewrite": {
"method": "GET",
"uri": "/rewritten?uri={http.request.uri}"

View file

@ -1 +1 @@
respond "'I am {args.0}', hears {args.1}"
respond "'I am {args[0]}', hears {args[1]}"

View file

@ -191,15 +191,17 @@ func (m MatchExpression) caddyPlaceholderFunc(lhs, rhs ref.Val) ref.Val {
celReq, ok := lhs.(celHTTPRequest)
if !ok {
return types.NewErr(
"invalid request of type '%v' to "+placeholderFuncName+"(request, placeholderVarName)",
"invalid request of type '%v' to %s(request, placeholderVarName)",
lhs.Type(),
placeholderFuncName,
)
}
phStr, ok := rhs.(types.String)
if !ok {
return types.NewErr(
"invalid placeholder variable name of type '%v' to "+placeholderFuncName+"(request, placeholderVarName)",
"invalid placeholder variable name of type '%v' to %s(request, placeholderVarName)",
rhs.Type(),
placeholderFuncName,
)
}

View file

@ -217,25 +217,18 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
return nil, dispenser.ArgErr()
}
fcgiTransport.Root = dispenser.Val()
dispenser.Delete()
dispenser.Delete()
dispenser.DeleteN(2)
case "split":
extensions = dispenser.RemainingArgs()
dispenser.Delete()
for range extensions {
dispenser.Delete()
}
dispenser.DeleteN(len(extensions) + 1)
if len(extensions) == 0 {
return nil, dispenser.ArgErr()
}
case "env":
args := dispenser.RemainingArgs()
dispenser.Delete()
for range args {
dispenser.Delete()
}
dispenser.DeleteN(len(args) + 1)
if len(args) != 2 {
return nil, dispenser.ArgErr()
}
@ -246,10 +239,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
case "index":
args := dispenser.RemainingArgs()
dispenser.Delete()
for range args {
dispenser.Delete()
}
dispenser.DeleteN(len(args) + 1)
if len(args) != 1 {
return nil, dispenser.ArgErr()
}
@ -257,10 +247,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
case "try_files":
args := dispenser.RemainingArgs()
dispenser.Delete()
for range args {
dispenser.Delete()
}
dispenser.DeleteN(len(args) + 1)
if len(args) < 1 {
return nil, dispenser.ArgErr()
}
@ -268,10 +255,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
case "resolve_root_symlink":
args := dispenser.RemainingArgs()
dispenser.Delete()
for range args {
dispenser.Delete()
}
dispenser.DeleteN(len(args) + 1)
fcgiTransport.ResolveRootSymlink = true
case "dial_timeout":
@ -283,8 +267,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err)
}
fcgiTransport.DialTimeout = caddy.Duration(dur)
dispenser.Delete()
dispenser.Delete()
dispenser.DeleteN(2)
case "read_timeout":
if !dispenser.NextArg() {
@ -295,8 +278,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err)
}
fcgiTransport.ReadTimeout = caddy.Duration(dur)
dispenser.Delete()
dispenser.Delete()
dispenser.DeleteN(2)
case "write_timeout":
if !dispenser.NextArg() {
@ -307,15 +289,11 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err)
}
fcgiTransport.WriteTimeout = caddy.Duration(dur)
dispenser.Delete()
dispenser.Delete()
dispenser.DeleteN(2)
case "capture_stderr":
args := dispenser.RemainingArgs()
dispenser.Delete()
for range args {
dispenser.Delete()
}
dispenser.DeleteN(len(args) + 1)
fcgiTransport.CaptureStderr = true
}
}

View file

@ -129,8 +129,7 @@ func parseCaddyfile(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error)
return nil, dispenser.ArgErr()
}
rpHandler.Rewrite.URI = dispenser.Val()
dispenser.Delete()
dispenser.Delete()
dispenser.DeleteN(2)
case "copy_headers":
args := dispenser.RemainingArgs()
@ -140,13 +139,11 @@ func parseCaddyfile(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error)
args = append(args, dispenser.Val())
}
dispenser.Delete() // directive name
// directive name + args
dispenser.DeleteN(len(args) + 1)
if hadBlock {
dispenser.Delete() // opening brace
dispenser.Delete() // closing brace
}
for range args {
dispenser.Delete()
// opening & closing brace
dispenser.DeleteN(2)
}
for _, headerField := range args {