httpcaddyfile: Fix nested blocks; add handle directive; refactor

The fix that was initially put forth in #2971 was good, but only for
up to one layer of nesting. The real problem was that we forgot to
increment nesting when already inside a block if we saw another open
curly brace that opens another block (dispenser.go L157-158).

The new 'handle' directive allows HTTP Caddyfiles to be designed more
like nginx location blocks if the user prefers. Inside a handle block,
directives are still ordered just like they are outside of them, but
handler blocks at a given level of nesting are mutually exclusive.

This work benefitted from some refactoring and cleanup.
This commit is contained in:
Matthew Holt 2020-01-16 17:08:52 -07:00
parent 21643a007a
commit e51e56a494
No known key found for this signature in database
GPG key ID: 2A349DD577D586A5
10 changed files with 269 additions and 100 deletions

View file

@ -152,8 +152,10 @@ func (d *Dispenser) NextBlock(initialNestingLevel int) bool {
if !d.Next() {
return false // should be EOF error
}
if d.Val() == "}" {
if d.Val() == "}" && !d.nextOnSameLine() {
d.nesting--
} else if d.Val() == "{" && !d.nextOnSameLine() {
d.nesting++
}
return d.nesting > initialNestingLevel
}
@ -262,9 +264,9 @@ func (d *Dispenser) NewFromNextTokens() *Dispenser {
if !openedBlock {
// because NextBlock() consumes the initial open
// curly brace, we rewind here to append it, since
// our case is special in that we want to include
// all the tokens including surrounding curly braces
// for a new dispenser to have
// our case is special in that we want the new
// dispenser to have all the tokens including
// surrounding curly braces
d.Prev()
tkns = append(tkns, d.Token())
d.Next()
@ -273,12 +275,12 @@ func (d *Dispenser) NewFromNextTokens() *Dispenser {
tkns = append(tkns, d.Token())
}
if openedBlock {
// include closing brace accordingly
// include closing brace
tkns = append(tkns, d.Token())
// since NewFromNextTokens is intended to consume the entire
// directive, we must call Next() here and consume the closing
// curly brace
d.Next()
// do not consume the closing curly brace; the
// next iteration of the enclosing loop will
// call Next() and consume it
}
return NewDispenser(tkns)
}

View file

@ -34,6 +34,7 @@ func init() {
RegisterHandlerDirective("redir", parseRedir)
RegisterHandlerDirective("respond", parseRespond)
RegisterHandlerDirective("route", parseRoute)
RegisterHandlerDirective("handle", parseHandle)
}
func parseBind(h Helper) ([]ConfigValue, error) {
@ -76,7 +77,7 @@ func parseRoot(h Helper) ([]ConfigValue, error) {
route.MatcherSetsRaw = []caddy.ModuleMap{matcherSet}
}
return h.NewVarsRoute(route), nil
return []ConfigValue{{Class: "route", Value: route}}, nil
}
func parseTLS(h Helper) ([]ConfigValue, error) {
@ -330,3 +331,34 @@ func parseRoute(h Helper) (caddyhttp.MiddlewareHandler, error) {
return sr, nil
}
func parseHandle(h Helper) (caddyhttp.MiddlewareHandler, error) {
var allResults []ConfigValue
for h.Next() {
for nesting := h.Nesting(); h.NextBlock(nesting); {
dir := h.Val()
dirFunc, ok := registeredDirectives[dir]
if !ok {
return nil, h.Errf("unrecognized directive: %s", dir)
}
subHelper := h
subHelper.Dispenser = h.NewFromNextTokens()
results, err := dirFunc(subHelper)
if err != nil {
return nil, h.Errf("parsing caddyfile tokens for '%s': %v", dir, err)
}
for _, result := range results {
result.directive = dir
allResults = append(allResults, result)
}
}
return buildSubroute(allResults, h.groupCounter)
}
return nil, nil
}

View file

@ -16,7 +16,6 @@ package httpcaddyfile
import (
"encoding/json"
"fmt"
"sort"
"github.com/caddyserver/caddy/v2"
@ -28,7 +27,10 @@ import (
// directiveOrder specifies the order
// to apply directives in HTTP routes.
var directiveOrder = []string{
"root",
"rewrite",
"strip_prefix",
"strip_suffix",
"uri_replace",
@ -38,7 +40,10 @@ var directiveOrder = []string{
"request_header",
"encode",
"templates",
"handle",
"route",
"redir",
"respond",
"reverse_proxy",
@ -46,6 +51,17 @@ var directiveOrder = []string{
"file_server",
}
// directiveIsOrdered returns true if dir is
// a known, ordered (sorted) directive.
func directiveIsOrdered(dir string) bool {
for _, d := range directiveOrder {
if d == dir {
return true
}
}
return false
}
// RegisterDirective registers a unique directive dir with an
// associated unmarshaling (setup) function. When directive dir
// is encountered in a Caddyfile, setupFunc will be called to
@ -98,7 +114,7 @@ type Helper struct {
warnings *[]caddyconfig.Warning
matcherDefs map[string]caddy.ModuleMap
parentBlock caddyfile.ServerBlock
groupCounter *int
groupCounter counter
}
// Option gets the option keyed by name.
@ -130,8 +146,8 @@ func (h Helper) JSON(val interface{}) json.RawMessage {
return caddyconfig.JSON(val, h.warnings)
}
// MatcherToken assumes the current token is (possibly) a matcher, and
// if so, returns the matcher set along with a true value. If the current
// MatcherToken assumes the next argument token is (possibly) a matcher,
// and if so, returns the matcher set along with a true value. If the next
// token is not a matcher, nil and false is returned. Note that a true
// value may be returned with a nil matcher set if it is a catch-all.
func (h Helper) MatcherToken() (caddy.ModuleMap, bool, error) {
@ -186,14 +202,13 @@ func (h Helper) GroupRoutes(vals []ConfigValue) {
}
// now that we know the group will have some effect, do it
groupNum := *h.groupCounter
groupName := h.groupCounter.nextGroup()
for i := range vals {
if route, ok := vals[i].Value.(caddyhttp.Route); ok {
route.Group = fmt.Sprintf("group%d", groupNum)
route.Group = groupName
vals[i].Value = route
}
}
*h.groupCounter++
}
// NewBindAddresses returns config values relevant to adding
@ -202,12 +217,6 @@ func (h Helper) NewBindAddresses(addrs []string) []ConfigValue {
return []ConfigValue{{Class: "bind", Value: addrs}}
}
// NewVarsRoute returns config values relevant to adding a
// "vars" wrapper route to the config.
func (h Helper) NewVarsRoute(route caddyhttp.Route) []ConfigValue {
return []ConfigValue{{Class: "var", Value: route}}
}
// ConfigValue represents a value to be added to the final
// configuration, or a value to be consulted when building
// the final configuration.
@ -228,7 +237,7 @@ type ConfigValue struct {
directive string
}
func sortRoutes(handlers []ConfigValue) {
func sortRoutes(routes []ConfigValue) {
dirPositions := make(map[string]int)
for i, dir := range directiveOrder {
dirPositions[dir] = i
@ -237,15 +246,21 @@ func sortRoutes(handlers []ConfigValue) {
// while we are sorting, we will need to decode a route's path matcher
// in order to sub-sort by path length; we can amortize this operation
// for efficiency by storing the decoded matchers in a slice
decodedMatchers := make([]caddyhttp.MatchPath, len(handlers))
decodedMatchers := make([]caddyhttp.MatchPath, len(routes))
sort.SliceStable(handlers, func(i, j int) bool {
iDir, jDir := handlers[i].directive, handlers[j].directive
sort.SliceStable(routes, func(i, j int) bool {
iDir, jDir := routes[i].directive, routes[j].directive
if iDir == jDir {
// directives are the same; sub-sort by path matcher length
// if there's only one matcher set and one path (common case)
iRoute := handlers[i].Value.(caddyhttp.Route)
jRoute := handlers[j].Value.(caddyhttp.Route)
iRoute, ok := routes[i].Value.(caddyhttp.Route)
if !ok {
return false
}
jRoute, ok := routes[j].Value.(caddyhttp.Route)
if !ok {
return false
}
if len(iRoute.MatcherSetsRaw) == 1 && len(jRoute.MatcherSetsRaw) == 1 {
// use already-decoded matcher, or decode if it's the first time seeing it

View file

@ -41,7 +41,7 @@ type ServerType struct {
func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
options map[string]interface{}) (*caddy.Config, []caddyconfig.Warning, error) {
var warnings []caddyconfig.Warning
groupCounter := new(int)
gc := counter{new(int)}
var serverBlocks []serverBlock
for _, sblock := range originalServerBlocks {
@ -146,7 +146,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
warnings: &warnings,
matcherDefs: matcherDefs,
parentBlock: sb.block,
groupCounter: groupCounter,
groupCounter: gc,
})
if err != nil {
return nil, warnings, fmt.Errorf("parsing caddyfile tokens for '%s': %v", dir, err)
@ -169,7 +169,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
// each pairing of listener addresses to list of server
// blocks is basically a server definition
servers, err := st.serversFromPairings(pairings, options, &warnings)
servers, err := st.serversFromPairings(pairings, options, &warnings, gc)
if err != nil {
return nil, warnings, err
}
@ -306,6 +306,7 @@ func (st *ServerType) serversFromPairings(
pairings []sbAddrAssociation,
options map[string]interface{},
warnings *[]caddyconfig.Warning,
groupCounter counter,
) (map[string]*caddyhttp.Server, error) {
servers := make(map[string]*caddyhttp.Server)
@ -320,32 +321,32 @@ func (st *ServerType) serversFromPairings(
// case their matchers overlap; we do this somewhat naively by
// descending sort by length of host then path
sort.SliceStable(p.serverBlocks, func(i, j int) bool {
// TODO: we could pre-process the lengths for efficiency,
// TODO: we could pre-process the specificities for efficiency,
// but I don't expect many blocks will have SO many keys...
var iLongestPath, jLongestPath string
var iLongestHost, jLongestHost string
for _, key := range p.serverBlocks[i].block.Keys {
addr, _ := ParseAddress(key)
if length(addr.Host) > length(iLongestHost) {
if specificity(addr.Host) > specificity(iLongestHost) {
iLongestHost = addr.Host
}
if len(addr.Path) > len(iLongestPath) {
if specificity(addr.Path) > specificity(iLongestPath) {
iLongestPath = addr.Path
}
}
for _, key := range p.serverBlocks[j].block.Keys {
addr, _ := ParseAddress(key)
if length(addr.Host) > length(jLongestHost) {
if specificity(addr.Host) > specificity(jLongestHost) {
jLongestHost = addr.Host
}
if len(addr.Path) > len(jLongestPath) {
if specificity(addr.Path) > specificity(jLongestPath) {
jLongestPath = addr.Path
}
}
if length(iLongestHost) == length(jLongestHost) {
if specificity(iLongestHost) == specificity(jLongestHost) {
return len(iLongestPath) > len(jLongestPath)
}
return length(iLongestHost) > length(jLongestHost)
return specificity(iLongestHost) > specificity(jLongestHost)
})
// create a subroute for each site in the server block
@ -355,10 +356,7 @@ func (st *ServerType) serversFromPairings(
return nil, fmt.Errorf("server block %v: compiling matcher sets: %v", sblock.block.Keys, err)
}
siteSubroute := new(caddyhttp.Subroute)
// tls: connection policies and toggle auto HTTPS
autoHTTPSQualifiedHosts, err := st.autoHTTPSHosts(sblock)
if err != nil {
return nil, err
@ -391,38 +389,13 @@ func (st *ServerType) serversFromPairings(
// TODO: consolidate equal conn policies
}
// vars: make sure these are linked in first so future
// routes can use the variables they define
for _, cfgVal := range sblock.pile["var"] {
siteSubroute.Routes = append(siteSubroute.Routes, cfgVal.Value.(caddyhttp.Route))
}
// set up each handler directive, making sure to honor directive order
dirRoutes := sblock.pile["route"]
sortRoutes(dirRoutes)
// add all the routes piled in from directives
for _, r := range dirRoutes {
// as a special case, group rewrite directives so that they are mutually exclusive;
// this means that only the first matching rewrite will be evaluated, and that's
// probably a good thing, since there should never be a need to do more than one
// rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites:
// rewrite /docs/json/* /docs/json/index.html
// rewrite /docs/* /docs/index.html
// (We use this on the Caddy website, or at least we did once.) The first rewrite's
// result is also matched by the second rewrite, making the first rewrite pointless.
// See issue #2959.
if r.directive == "rewrite" {
route := r.Value.(caddyhttp.Route)
route.Group = "rewriting"
r.Value = route
}
siteSubroute.Routes = append(siteSubroute.Routes, r.Value.(caddyhttp.Route))
siteSubroute, err := buildSubroute(dirRoutes, groupCounter)
if err != nil {
return nil, err
}
siteSubroute.Routes = consolidateRoutes(siteSubroute.Routes)
if len(matcherSetsEnc) == 0 && len(p.serverBlocks) == 1 {
// no need to wrap the handlers in a subroute if this is
// the only server block and there is no matcher for it
@ -446,6 +419,98 @@ func (st *ServerType) serversFromPairings(
return servers, nil
}
func buildSubroute(routes []ConfigValue, groupCounter counter) (*caddyhttp.Subroute, error) {
for _, val := range routes {
if !directiveIsOrdered(val.directive) {
return nil, fmt.Errorf("directive '%s' is not ordered, so it cannot be used here", val.directive)
}
}
sortRoutes(routes)
subroute := new(caddyhttp.Subroute)
// get a group name for rewrite directives, if needed
var rewriteGroupName string
var rewriteCount int
for _, r := range routes {
if r.directive == "rewrite" {
rewriteCount++
if rewriteCount > 1 {
break
}
}
}
if rewriteCount > 1 {
rewriteGroupName = groupCounter.nextGroup()
}
// get a group name for handle blocks, if needed
var handleGroupName string
var handleCount int
for _, r := range routes {
if r.directive == "handle" {
handleCount++
if handleCount > 1 {
break
}
}
}
if handleCount > 1 {
handleGroupName = groupCounter.nextGroup()
}
// add all the routes piled in from directives
for _, r := range routes {
// as a special case, group rewrite directives so that they are mutually exclusive;
// this means that only the first matching rewrite will be evaluated, and that's
// probably a good thing, since there should never be a need to do more than one
// rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites:
// rewrite /docs/json/* /docs/json/index.html
// rewrite /docs/* /docs/index.html
// (We use this on the Caddy website, or at least we did once.) The first rewrite's
// result is also matched by the second rewrite, making the first rewrite pointless.
// See issue #2959.
if r.directive == "rewrite" {
route := r.Value.(caddyhttp.Route)
route.Group = rewriteGroupName
r.Value = route
}
// handle blocks are also mutually exclusive by definition
if r.directive == "handle" {
route := r.Value.(caddyhttp.Route)
route.Group = handleGroupName
r.Value = route
}
switch route := r.Value.(type) {
case caddyhttp.Subroute:
// if a route-class config value is actually a Subroute handler
// with nothing but a list of routes, then it is the intention
// of the directive to keep these handlers together and in this
// same order, but not necessarily in a subroute (if it wanted
// to keep them in a subroute, the directive would have returned
// a route with a Subroute as its handler); this is useful to
// keep multiple handlers/routes together and in the same order
// so that the sorting procedure we did above doesn't reorder them
if route.Errors != nil {
// if error handlers are also set, this is confusing; it's
// probably supposed to be wrapped in a Route and encoded
// as a regular handler route... programmer error.
panic("found subroute with more than just routes; perhaps it should have been wrapped in a route?")
}
subroute.Routes = append(subroute.Routes, route.Routes...)
case caddyhttp.Route:
subroute.Routes = append(subroute.Routes, route)
}
}
subroute.Routes = consolidateRoutes(subroute.Routes)
return subroute, nil
}
func (st ServerType) autoHTTPSHosts(sb serverBlock) ([]string, error) {
// get the hosts for this server block...
hosts, err := st.hostsFromServerBlockKeys(sb.block)
@ -521,7 +586,6 @@ func matcherSetFromMatcherToken(
}
return m, true, nil
}
return nil, false, nil
}
@ -659,14 +723,40 @@ func tryInt(val interface{}, warnings *[]caddyconfig.Warning) int {
return intVal
}
// length returns len(s) minus any wildcards (*). Basically,
// it's a length count that penalizes the use of wildcards.
// This is useful for comparing hostnames, but probably not
// paths so much (for example, '*.example.com' is clearly
// less specific than 'a.example.com', but is '/a' more or
// less specific than '/a*'?).
func length(s string) int {
return len(s) - strings.Count(s, "*")
// specifity returns len(s) minus any wildcards (*) and
// placeholders ({...}). Basically, it's a length count
// that penalizes the use of wildcards and placeholders.
// This is useful for comparing hostnames and paths.
// However, wildcards in paths are not a sure answer to
// the question of specificity. For exmaple,
// '*.example.com' is clearly less specific than
// 'a.example.com', but is '/a' more or less specific
// than '/a*'?
func specificity(s string) int {
l := len(s) - strings.Count(s, "*")
for len(s) > 0 {
start := strings.Index(s, "{")
if start < 0 {
return l
}
end := strings.Index(s[start:], "}") + start + 1
if end <= start {
return l
}
l -= end - start
s = s[end:]
}
return l
}
type counter struct {
n *int
}
func (c counter) nextGroup() string {
name := fmt.Sprintf("group%d", *c.n)
*c.n++
return name
}
type matcherSetAndTokens struct {

View file

@ -0,0 +1,33 @@
package httpcaddyfile
import "testing"
func TestSpecificity(t *testing.T) {
for i, tc := range []struct {
input string
expect int
}{
{"", 0},
{"*", 0},
{"*.*", 1},
{"{placeholder}", 0},
{"/{placeholder}", 1},
{"foo", 3},
{"example.com", 11},
{"a.example.com", 13},
{"*.example.com", 12},
{"/foo", 4},
{"/foo*", 4},
{"{placeholder}.example.com", 12},
{"{placeholder.example.com", 24},
{"}.", 2},
{"}{", 2},
{"{}", 0},
{"{{{}}", 1},
} {
actual := specificity(tc.input)
if actual != tc.expect {
t.Errorf("Test %d (%s): Expected %d but got %d", i, tc.input, tc.expect, actual)
}
}
}

View file

@ -338,6 +338,9 @@ func (app *App) automaticHTTPSPhase2() error {
if err != nil {
return fmt.Errorf("%s: managing certificate for %s: %s", srvName, domains, err)
}
// no longer needed; allow GC to deallocate
srv.AutoHTTPS.domainSet = nil
}
return nil

View file

@ -199,7 +199,7 @@ func TestPathMatcher(t *testing.T) {
},
{
match: MatchPath{"*.ext"},
input: "/foo.ext",
input: "/foo/bar.ext",
expect: true,
},
{

View file

@ -81,7 +81,7 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
//
// php_fastcgi localhost:7777
//
// is equivalent to:
// is equivalent to a route consisting of:
//
// @canonicalPath {
// file {
@ -104,8 +104,8 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
// }
// }
//
// Thus, this directive produces multiple routes, each with a different
// matcher because multiple consecutive routes are necessary to support
// Thus, this directive produces multiple handlers, each with a different
// matcher because multiple consecutive hgandlers are necessary to support
// the common PHP use case. If this "common" config is not compatible
// with a user's PHP requirements, they can use a manual approach based
// on the example above to configure it precisely as they need.
@ -114,7 +114,7 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
//
// php_fastcgi /subpath localhost:7777
//
// then the resulting routes are wrapped in a subroute that uses the
// then the resulting handlers are wrapped in a subroute that uses the
// user's matcher as a prerequisite to enter the subroute. In other
// words, the directive's matcher is necessary, but not sufficient.
func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) {
@ -198,12 +198,13 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
HandlersRaw: []json.RawMessage{caddyconfig.JSONModuleObject(rpHandler, "handler", "reverse_proxy", nil)},
}
subroute := caddyhttp.Subroute{
Routes: caddyhttp.RouteList{redirRoute, rewriteRoute, rpRoute},
}
// the user's matcher is a prerequisite for ours, so
// wrap ours in a subroute and return that
if hasUserMatcher {
subroute := caddyhttp.Subroute{
Routes: caddyhttp.RouteList{redirRoute, rewriteRoute, rpRoute},
}
return []httpcaddyfile.ConfigValue{
{
Class: "route",
@ -215,20 +216,14 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
}, nil
}
// if the user did not specify a matcher, then
// we can just use our own matchers
// otherwise, return the literal subroute instead of
// individual routes, to ensure they stay together and
// are treated as a single unit, without necessarily
// creating an actual subroute in the output
return []httpcaddyfile.ConfigValue{
{
Class: "route",
Value: redirRoute,
},
{
Class: "route",
Value: rewriteRoute,
},
{
Class: "route",
Value: rpRoute,
Value: subroute,
},
}, nil
}

View file

@ -50,7 +50,7 @@ type Rewrite struct {
// You can also use placeholders. For example, to preserve the existing
// query string, you might use: "?{http.request.uri.query}&a=b". Any
// key-value pairs you add to the query string will not overwrite
// existing values.
// existing values (individual pairs are append-only).
//
// To clear the query string, explicitly set an empty one: "?"
URI string `json:"uri,omitempty"`

View file

@ -252,7 +252,6 @@ func wrapMiddleware(mh MiddlewareHandler) Middleware {
return HandlerFunc(func(w http.ResponseWriter, r *http.Request) error {
// TODO: This is where request tracing could be implemented
// TODO: Trace a diff of the request, would be cool too... see what changed since the last middleware (host, headers, URI...)
// TODO: see what the std lib gives us in terms of stack tracing too
return mh.ServeHTTP(w, r, nextCopy)
})