httpcaddyfile: Fix nested blocks; add handle directive; refactor

The fix that was initially put forth in  was good, but only for
up to one layer of nesting. The real problem was that we forgot to
increment nesting when already inside a block if we saw another open
curly brace that opens another block (dispenser.go L157-158).

The new 'handle' directive allows HTTP Caddyfiles to be designed more
like nginx location blocks if the user prefers. Inside a handle block,
directives are still ordered just like they are outside of them, but
handler blocks at a given level of nesting are mutually exclusive.

This work benefitted from some refactoring and cleanup.
This commit is contained in:
Matthew Holt 2020-01-16 17:08:52 -07:00
parent 21643a007a
commit e51e56a494
No known key found for this signature in database
GPG key ID: 2A349DD577D586A5
10 changed files with 269 additions and 100 deletions

View file

@ -152,8 +152,10 @@ func (d *Dispenser) NextBlock(initialNestingLevel int) bool {
if !d.Next() {
return false // should be EOF error
}
if d.Val() == "}" {
if d.Val() == "}" && !d.nextOnSameLine() {
d.nesting--
} else if d.Val() == "{" && !d.nextOnSameLine() {
d.nesting++
}
return d.nesting > initialNestingLevel
}
@ -262,9 +264,9 @@ func (d *Dispenser) NewFromNextTokens() *Dispenser {
if !openedBlock {
// because NextBlock() consumes the initial open
// curly brace, we rewind here to append it, since
// our case is special in that we want to include
// all the tokens including surrounding curly braces
// for a new dispenser to have
// our case is special in that we want the new
// dispenser to have all the tokens including
// surrounding curly braces
d.Prev()
tkns = append(tkns, d.Token())
d.Next()
@ -273,12 +275,12 @@ func (d *Dispenser) NewFromNextTokens() *Dispenser {
tkns = append(tkns, d.Token())
}
if openedBlock {
// include closing brace accordingly
// include closing brace
tkns = append(tkns, d.Token())
// since NewFromNextTokens is intended to consume the entire
// directive, we must call Next() here and consume the closing
// curly brace
d.Next()
// do not consume the closing curly brace; the
// next iteration of the enclosing loop will
// call Next() and consume it
}
return NewDispenser(tkns)
}

View file

@ -34,6 +34,7 @@ func init() {
RegisterHandlerDirective("redir", parseRedir)
RegisterHandlerDirective("respond", parseRespond)
RegisterHandlerDirective("route", parseRoute)
RegisterHandlerDirective("handle", parseHandle)
}
func parseBind(h Helper) ([]ConfigValue, error) {
@ -76,7 +77,7 @@ func parseRoot(h Helper) ([]ConfigValue, error) {
route.MatcherSetsRaw = []caddy.ModuleMap{matcherSet}
}
return h.NewVarsRoute(route), nil
return []ConfigValue{{Class: "route", Value: route}}, nil
}
func parseTLS(h Helper) ([]ConfigValue, error) {
@ -330,3 +331,34 @@ func parseRoute(h Helper) (caddyhttp.MiddlewareHandler, error) {
return sr, nil
}
func parseHandle(h Helper) (caddyhttp.MiddlewareHandler, error) {
var allResults []ConfigValue
for h.Next() {
for nesting := h.Nesting(); h.NextBlock(nesting); {
dir := h.Val()
dirFunc, ok := registeredDirectives[dir]
if !ok {
return nil, h.Errf("unrecognized directive: %s", dir)
}
subHelper := h
subHelper.Dispenser = h.NewFromNextTokens()
results, err := dirFunc(subHelper)
if err != nil {
return nil, h.Errf("parsing caddyfile tokens for '%s': %v", dir, err)
}
for _, result := range results {
result.directive = dir
allResults = append(allResults, result)
}
}
return buildSubroute(allResults, h.groupCounter)
}
return nil, nil
}

View file

@ -16,7 +16,6 @@ package httpcaddyfile
import (
"encoding/json"
"fmt"
"sort"
"github.com/caddyserver/caddy/v2"
@ -28,7 +27,10 @@ import (
// directiveOrder specifies the order
// to apply directives in HTTP routes.
var directiveOrder = []string{
"root",
"rewrite",
"strip_prefix",
"strip_suffix",
"uri_replace",
@ -38,7 +40,10 @@ var directiveOrder = []string{
"request_header",
"encode",
"templates",
"handle",
"route",
"redir",
"respond",
"reverse_proxy",
@ -46,6 +51,17 @@ var directiveOrder = []string{
"file_server",
}
// directiveIsOrdered returns true if dir is
// a known, ordered (sorted) directive.
func directiveIsOrdered(dir string) bool {
for _, d := range directiveOrder {
if d == dir {
return true
}
}
return false
}
// RegisterDirective registers a unique directive dir with an
// associated unmarshaling (setup) function. When directive dir
// is encountered in a Caddyfile, setupFunc will be called to
@ -98,7 +114,7 @@ type Helper struct {
warnings *[]caddyconfig.Warning
matcherDefs map[string]caddy.ModuleMap
parentBlock caddyfile.ServerBlock
groupCounter *int
groupCounter counter
}
// Option gets the option keyed by name.
@ -130,8 +146,8 @@ func (h Helper) JSON(val interface{}) json.RawMessage {
return caddyconfig.JSON(val, h.warnings)
}
// MatcherToken assumes the current token is (possibly) a matcher, and
// if so, returns the matcher set along with a true value. If the current
// MatcherToken assumes the next argument token is (possibly) a matcher,
// and if so, returns the matcher set along with a true value. If the next
// token is not a matcher, nil and false is returned. Note that a true
// value may be returned with a nil matcher set if it is a catch-all.
func (h Helper) MatcherToken() (caddy.ModuleMap, bool, error) {
@ -186,14 +202,13 @@ func (h Helper) GroupRoutes(vals []ConfigValue) {
}
// now that we know the group will have some effect, do it
groupNum := *h.groupCounter
groupName := h.groupCounter.nextGroup()
for i := range vals {
if route, ok := vals[i].Value.(caddyhttp.Route); ok {
route.Group = fmt.Sprintf("group%d", groupNum)
route.Group = groupName
vals[i].Value = route
}
}
*h.groupCounter++
}
// NewBindAddresses returns config values relevant to adding
@ -202,12 +217,6 @@ func (h Helper) NewBindAddresses(addrs []string) []ConfigValue {
return []ConfigValue{{Class: "bind", Value: addrs}}
}
// NewVarsRoute returns config values relevant to adding a
// "vars" wrapper route to the config.
func (h Helper) NewVarsRoute(route caddyhttp.Route) []ConfigValue {
return []ConfigValue{{Class: "var", Value: route}}
}
// ConfigValue represents a value to be added to the final
// configuration, or a value to be consulted when building
// the final configuration.
@ -228,7 +237,7 @@ type ConfigValue struct {
directive string
}
func sortRoutes(handlers []ConfigValue) {
func sortRoutes(routes []ConfigValue) {
dirPositions := make(map[string]int)
for i, dir := range directiveOrder {
dirPositions[dir] = i
@ -237,15 +246,21 @@ func sortRoutes(handlers []ConfigValue) {
// while we are sorting, we will need to decode a route's path matcher
// in order to sub-sort by path length; we can amortize this operation
// for efficiency by storing the decoded matchers in a slice
decodedMatchers := make([]caddyhttp.MatchPath, len(handlers))
decodedMatchers := make([]caddyhttp.MatchPath, len(routes))
sort.SliceStable(handlers, func(i, j int) bool {
iDir, jDir := handlers[i].directive, handlers[j].directive
sort.SliceStable(routes, func(i, j int) bool {
iDir, jDir := routes[i].directive, routes[j].directive
if iDir == jDir {
// directives are the same; sub-sort by path matcher length
// if there's only one matcher set and one path (common case)
iRoute := handlers[i].Value.(caddyhttp.Route)
jRoute := handlers[j].Value.(caddyhttp.Route)
iRoute, ok := routes[i].Value.(caddyhttp.Route)
if !ok {
return false
}
jRoute, ok := routes[j].Value.(caddyhttp.Route)
if !ok {
return false
}
if len(iRoute.MatcherSetsRaw) == 1 && len(jRoute.MatcherSetsRaw) == 1 {
// use already-decoded matcher, or decode if it's the first time seeing it

View file

@ -41,7 +41,7 @@ type ServerType struct {
func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
options map[string]interface{}) (*caddy.Config, []caddyconfig.Warning, error) {
var warnings []caddyconfig.Warning
groupCounter := new(int)
gc := counter{new(int)}
var serverBlocks []serverBlock
for _, sblock := range originalServerBlocks {
@ -146,7 +146,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
warnings: &warnings,
matcherDefs: matcherDefs,
parentBlock: sb.block,
groupCounter: groupCounter,
groupCounter: gc,
})
if err != nil {
return nil, warnings, fmt.Errorf("parsing caddyfile tokens for '%s': %v", dir, err)
@ -169,7 +169,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock,
// each pairing of listener addresses to list of server
// blocks is basically a server definition
servers, err := st.serversFromPairings(pairings, options, &warnings)
servers, err := st.serversFromPairings(pairings, options, &warnings, gc)
if err != nil {
return nil, warnings, err
}
@ -306,6 +306,7 @@ func (st *ServerType) serversFromPairings(
pairings []sbAddrAssociation,
options map[string]interface{},
warnings *[]caddyconfig.Warning,
groupCounter counter,
) (map[string]*caddyhttp.Server, error) {
servers := make(map[string]*caddyhttp.Server)
@ -320,32 +321,32 @@ func (st *ServerType) serversFromPairings(
// case their matchers overlap; we do this somewhat naively by
// descending sort by length of host then path
sort.SliceStable(p.serverBlocks, func(i, j int) bool {
// TODO: we could pre-process the lengths for efficiency,
// TODO: we could pre-process the specificities for efficiency,
// but I don't expect many blocks will have SO many keys...
var iLongestPath, jLongestPath string
var iLongestHost, jLongestHost string
for _, key := range p.serverBlocks[i].block.Keys {
addr, _ := ParseAddress(key)
if length(addr.Host) > length(iLongestHost) {
if specificity(addr.Host) > specificity(iLongestHost) {
iLongestHost = addr.Host
}
if len(addr.Path) > len(iLongestPath) {
if specificity(addr.Path) > specificity(iLongestPath) {
iLongestPath = addr.Path
}
}
for _, key := range p.serverBlocks[j].block.Keys {
addr, _ := ParseAddress(key)
if length(addr.Host) > length(jLongestHost) {
if specificity(addr.Host) > specificity(jLongestHost) {
jLongestHost = addr.Host
}
if len(addr.Path) > len(jLongestPath) {
if specificity(addr.Path) > specificity(jLongestPath) {
jLongestPath = addr.Path
}
}
if length(iLongestHost) == length(jLongestHost) {
if specificity(iLongestHost) == specificity(jLongestHost) {
return len(iLongestPath) > len(jLongestPath)
}
return length(iLongestHost) > length(jLongestHost)
return specificity(iLongestHost) > specificity(jLongestHost)
})
// create a subroute for each site in the server block
@ -355,10 +356,7 @@ func (st *ServerType) serversFromPairings(
return nil, fmt.Errorf("server block %v: compiling matcher sets: %v", sblock.block.Keys, err)
}
siteSubroute := new(caddyhttp.Subroute)
// tls: connection policies and toggle auto HTTPS
autoHTTPSQualifiedHosts, err := st.autoHTTPSHosts(sblock)
if err != nil {
return nil, err
@ -391,38 +389,13 @@ func (st *ServerType) serversFromPairings(
// TODO: consolidate equal conn policies
}
// vars: make sure these are linked in first so future
// routes can use the variables they define
for _, cfgVal := range sblock.pile["var"] {
siteSubroute.Routes = append(siteSubroute.Routes, cfgVal.Value.(caddyhttp.Route))
}
// set up each handler directive, making sure to honor directive order
dirRoutes := sblock.pile["route"]
sortRoutes(dirRoutes)
// add all the routes piled in from directives
for _, r := range dirRoutes {
// as a special case, group rewrite directives so that they are mutually exclusive;
// this means that only the first matching rewrite will be evaluated, and that's
// probably a good thing, since there should never be a need to do more than one
// rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites:
// rewrite /docs/json/* /docs/json/index.html
// rewrite /docs/* /docs/index.html
// (We use this on the Caddy website, or at least we did once.) The first rewrite's
// result is also matched by the second rewrite, making the first rewrite pointless.
// See issue #2959.
if r.directive == "rewrite" {
route := r.Value.(caddyhttp.Route)
route.Group = "rewriting"
r.Value = route
}
siteSubroute.Routes = append(siteSubroute.Routes, r.Value.(caddyhttp.Route))
siteSubroute, err := buildSubroute(dirRoutes, groupCounter)
if err != nil {
return nil, err
}
siteSubroute.Routes = consolidateRoutes(siteSubroute.Routes)
if len(matcherSetsEnc) == 0 && len(p.serverBlocks) == 1 {
// no need to wrap the handlers in a subroute if this is
// the only server block and there is no matcher for it
@ -446,6 +419,98 @@ func (st *ServerType) serversFromPairings(
return servers, nil
}
func buildSubroute(routes []ConfigValue, groupCounter counter) (*caddyhttp.Subroute, error) {
for _, val := range routes {
if !directiveIsOrdered(val.directive) {
return nil, fmt.Errorf("directive '%s' is not ordered, so it cannot be used here", val.directive)
}
}
sortRoutes(routes)
subroute := new(caddyhttp.Subroute)
// get a group name for rewrite directives, if needed
var rewriteGroupName string
var rewriteCount int
for _, r := range routes {
if r.directive == "rewrite" {
rewriteCount++
if rewriteCount > 1 {
break
}
}
}
if rewriteCount > 1 {
rewriteGroupName = groupCounter.nextGroup()
}
// get a group name for handle blocks, if needed
var handleGroupName string
var handleCount int
for _, r := range routes {
if r.directive == "handle" {
handleCount++
if handleCount > 1 {
break
}
}
}
if handleCount > 1 {
handleGroupName = groupCounter.nextGroup()
}
// add all the routes piled in from directives
for _, r := range routes {
// as a special case, group rewrite directives so that they are mutually exclusive;
// this means that only the first matching rewrite will be evaluated, and that's
// probably a good thing, since there should never be a need to do more than one
// rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites:
// rewrite /docs/json/* /docs/json/index.html
// rewrite /docs/* /docs/index.html
// (We use this on the Caddy website, or at least we did once.) The first rewrite's
// result is also matched by the second rewrite, making the first rewrite pointless.
// See issue #2959.
if r.directive == "rewrite" {
route := r.Value.(caddyhttp.Route)
route.Group = rewriteGroupName
r.Value = route
}
// handle blocks are also mutually exclusive by definition
if r.directive == "handle" {
route := r.Value.(caddyhttp.Route)
route.Group = handleGroupName
r.Value = route
}
switch route := r.Value.(type) {
case caddyhttp.Subroute:
// if a route-class config value is actually a Subroute handler
// with nothing but a list of routes, then it is the intention
// of the directive to keep these handlers together and in this
// same order, but not necessarily in a subroute (if it wanted
// to keep them in a subroute, the directive would have returned
// a route with a Subroute as its handler); this is useful to
// keep multiple handlers/routes together and in the same order
// so that the sorting procedure we did above doesn't reorder them
if route.Errors != nil {
// if error handlers are also set, this is confusing; it's
// probably supposed to be wrapped in a Route and encoded
// as a regular handler route... programmer error.
panic("found subroute with more than just routes; perhaps it should have been wrapped in a route?")
}
subroute.Routes = append(subroute.Routes, route.Routes...)
case caddyhttp.Route:
subroute.Routes = append(subroute.Routes, route)
}
}
subroute.Routes = consolidateRoutes(subroute.Routes)
return subroute, nil
}
func (st ServerType) autoHTTPSHosts(sb serverBlock) ([]string, error) {
// get the hosts for this server block...
hosts, err := st.hostsFromServerBlockKeys(sb.block)
@ -521,7 +586,6 @@ func matcherSetFromMatcherToken(
}
return m, true, nil
}
return nil, false, nil
}
@ -659,14 +723,40 @@ func tryInt(val interface{}, warnings *[]caddyconfig.Warning) int {
return intVal
}
// length returns len(s) minus any wildcards (*). Basically,
// it's a length count that penalizes the use of wildcards.
// This is useful for comparing hostnames, but probably not
// paths so much (for example, '*.example.com' is clearly
// less specific than 'a.example.com', but is '/a' more or
// less specific than '/a*'?).
func length(s string) int {
return len(s) - strings.Count(s, "*")
// specifity returns len(s) minus any wildcards (*) and
// placeholders ({...}). Basically, it's a length count
// that penalizes the use of wildcards and placeholders.
// This is useful for comparing hostnames and paths.
// However, wildcards in paths are not a sure answer to
// the question of specificity. For exmaple,
// '*.example.com' is clearly less specific than
// 'a.example.com', but is '/a' more or less specific
// than '/a*'?
func specificity(s string) int {
l := len(s) - strings.Count(s, "*")
for len(s) > 0 {
start := strings.Index(s, "{")
if start < 0 {
return l
}
end := strings.Index(s[start:], "}") + start + 1
if end <= start {
return l
}
l -= end - start
s = s[end:]
}
return l
}
type counter struct {
n *int
}
func (c counter) nextGroup() string {
name := fmt.Sprintf("group%d", *c.n)
*c.n++
return name
}
type matcherSetAndTokens struct {

View file

@ -0,0 +1,33 @@
package httpcaddyfile
import "testing"
func TestSpecificity(t *testing.T) {
for i, tc := range []struct {
input string
expect int
}{
{"", 0},
{"*", 0},
{"*.*", 1},
{"{placeholder}", 0},
{"/{placeholder}", 1},
{"foo", 3},
{"example.com", 11},
{"a.example.com", 13},
{"*.example.com", 12},
{"/foo", 4},
{"/foo*", 4},
{"{placeholder}.example.com", 12},
{"{placeholder.example.com", 24},
{"}.", 2},
{"}{", 2},
{"{}", 0},
{"{{{}}", 1},
} {
actual := specificity(tc.input)
if actual != tc.expect {
t.Errorf("Test %d (%s): Expected %d but got %d", i, tc.input, tc.expect, actual)
}
}
}

View file

@ -338,6 +338,9 @@ func (app *App) automaticHTTPSPhase2() error {
if err != nil {
return fmt.Errorf("%s: managing certificate for %s: %s", srvName, domains, err)
}
// no longer needed; allow GC to deallocate
srv.AutoHTTPS.domainSet = nil
}
return nil

View file

@ -199,7 +199,7 @@ func TestPathMatcher(t *testing.T) {
},
{
match: MatchPath{"*.ext"},
input: "/foo.ext",
input: "/foo/bar.ext",
expect: true,
},
{

View file

@ -81,7 +81,7 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
//
// php_fastcgi localhost:7777
//
// is equivalent to:
// is equivalent to a route consisting of:
//
// @canonicalPath {
// file {
@ -104,8 +104,8 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
// }
// }
//
// Thus, this directive produces multiple routes, each with a different
// matcher because multiple consecutive routes are necessary to support
// Thus, this directive produces multiple handlers, each with a different
// matcher because multiple consecutive hgandlers are necessary to support
// the common PHP use case. If this "common" config is not compatible
// with a user's PHP requirements, they can use a manual approach based
// on the example above to configure it precisely as they need.
@ -114,7 +114,7 @@ func (t *Transport) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
//
// php_fastcgi /subpath localhost:7777
//
// then the resulting routes are wrapped in a subroute that uses the
// then the resulting handlers are wrapped in a subroute that uses the
// user's matcher as a prerequisite to enter the subroute. In other
// words, the directive's matcher is necessary, but not sufficient.
func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) {
@ -198,12 +198,13 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
HandlersRaw: []json.RawMessage{caddyconfig.JSONModuleObject(rpHandler, "handler", "reverse_proxy", nil)},
}
subroute := caddyhttp.Subroute{
Routes: caddyhttp.RouteList{redirRoute, rewriteRoute, rpRoute},
}
// the user's matcher is a prerequisite for ours, so
// wrap ours in a subroute and return that
if hasUserMatcher {
subroute := caddyhttp.Subroute{
Routes: caddyhttp.RouteList{redirRoute, rewriteRoute, rpRoute},
}
return []httpcaddyfile.ConfigValue{
{
Class: "route",
@ -215,20 +216,14 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error
}, nil
}
// if the user did not specify a matcher, then
// we can just use our own matchers
// otherwise, return the literal subroute instead of
// individual routes, to ensure they stay together and
// are treated as a single unit, without necessarily
// creating an actual subroute in the output
return []httpcaddyfile.ConfigValue{
{
Class: "route",
Value: redirRoute,
},
{
Class: "route",
Value: rewriteRoute,
},
{
Class: "route",
Value: rpRoute,
Value: subroute,
},
}, nil
}

View file

@ -50,7 +50,7 @@ type Rewrite struct {
// You can also use placeholders. For example, to preserve the existing
// query string, you might use: "?{http.request.uri.query}&a=b". Any
// key-value pairs you add to the query string will not overwrite
// existing values.
// existing values (individual pairs are append-only).
//
// To clear the query string, explicitly set an empty one: "?"
URI string `json:"uri,omitempty"`

View file

@ -252,7 +252,6 @@ func wrapMiddleware(mh MiddlewareHandler) Middleware {
return HandlerFunc(func(w http.ResponseWriter, r *http.Request) error {
// TODO: This is where request tracing could be implemented
// TODO: Trace a diff of the request, would be cool too... see what changed since the last middleware (host, headers, URI...)
// TODO: see what the std lib gives us in terms of stack tracing too
return mh.ServeHTTP(w, r, nextCopy)
})