caddyhttp: Implement better HTTP matchers including regexp; add tests

This commit is contained in:
Matthew Holt 2019-05-10 21:07:02 -06:00
parent 48b5a80320
commit 8ae0d6a509
5 changed files with 519 additions and 66 deletions

View file

@ -1,8 +1,12 @@
package caddyhttp
import (
"fmt"
"log"
"net/http"
"net/textproto"
"net/url"
"regexp"
"strings"
"bitbucket.org/lightcodelabs/caddy2"
@ -10,15 +14,16 @@ import (
"go.starlark.net/starlark"
)
// TODO: Matchers should probably support regex of some sort... performance trade-offs?
type (
matchHost []string
matchPath []string
matchPathRE struct{ matchRegexp }
matchMethod []string
matchQuery map[string][]string
matchHeader map[string][]string
matchQuery url.Values
matchHeader http.Header
matchHeaderRE map[string]*matchRegexp
matchProtocol string
matchScript string
matchStarlark string
)
func init() {
@ -30,6 +35,10 @@ func init() {
Name: "http.matchers.path",
New: func() (interface{}, error) { return matchPath{}, nil },
})
caddy2.RegisterModule(caddy2.Module{
Name: "http.matchers.path_regexp",
New: func() (interface{}, error) { return new(matchPathRE), nil },
})
caddy2.RegisterModule(caddy2.Module{
Name: "http.matchers.method",
New: func() (interface{}, error) { return matchMethod{}, nil },
@ -42,45 +51,39 @@ func init() {
Name: "http.matchers.header",
New: func() (interface{}, error) { return matchHeader{}, nil },
})
caddy2.RegisterModule(caddy2.Module{
Name: "http.matchers.header_regexp",
New: func() (interface{}, error) { return matchHeaderRE{}, nil },
})
caddy2.RegisterModule(caddy2.Module{
Name: "http.matchers.protocol",
New: func() (interface{}, error) { return new(matchProtocol), nil },
})
caddy2.RegisterModule(caddy2.Module{
Name: "http.matchers.caddyscript",
New: func() (interface{}, error) { return new(matchScript), nil },
New: func() (interface{}, error) { return new(matchStarlark), nil },
})
}
func (m matchScript) Match(r *http.Request) bool {
input := string(m)
thread := new(starlark.Thread)
env := caddyscript.MatcherEnv(r)
val, err := starlark.Eval(thread, "", input, env)
if err != nil {
log.Printf("caddyscript for matcher is invalid: attempting to evaluate expression `%v` error `%v`", input, err)
return false
}
return val.String() == "True"
}
func (m matchProtocol) Match(r *http.Request) bool {
switch string(m) {
case "grpc":
return r.Header.Get("content-type") == "application/grpc"
case "https":
return r.TLS != nil
case "http":
return r.TLS == nil
}
return false
}
func (m matchHost) Match(r *http.Request) bool {
outer:
for _, host := range m {
if r.Host == host {
if strings.Contains(host, "*") {
patternParts := strings.Split(host, ".")
incomingParts := strings.Split(r.Host, ".")
if len(patternParts) != len(incomingParts) {
continue
}
for i := range patternParts {
if patternParts[i] == "*" {
continue
}
if !strings.EqualFold(patternParts[i], incomingParts[i]) {
continue outer
}
}
return true
} else if strings.EqualFold(r.Host, host) {
return true
}
}
@ -96,6 +99,11 @@ func (m matchPath) Match(r *http.Request) bool {
return false
}
func (m matchPathRE) Match(r *http.Request) bool {
repl := r.Context().Value(ReplacerCtxKey).(*Replacer)
return m.match(r.URL.Path, repl, "path_regexp")
}
func (m matchMethod) Match(r *http.Request) bool {
for _, method := range m {
if r.Method == method {
@ -118,26 +126,139 @@ func (m matchQuery) Match(r *http.Request) bool {
}
func (m matchHeader) Match(r *http.Request) bool {
for field, vals := range m {
fieldVals := r.Header[field]
for _, fieldVal := range fieldVals {
for _, v := range vals {
if fieldVal == v {
return true
for field, allowedFieldVals := range m {
var match bool
actualFieldVals := r.Header[textproto.CanonicalMIMEHeaderKey(field)]
fieldVals:
for _, actualFieldVal := range actualFieldVals {
for _, allowedFieldVal := range allowedFieldVals {
if actualFieldVal == allowedFieldVal {
match = true
break fieldVals
}
}
}
if !match {
return false
}
}
return true
}
func (m matchHeaderRE) Match(r *http.Request) bool {
for field, rm := range m {
repl := r.Context().Value(ReplacerCtxKey).(*Replacer)
match := rm.match(r.Header.Get(field), repl, "header_regexp")
if !match {
return false
}
}
return true
}
func (m matchHeaderRE) Provision() error {
for _, rm := range m {
err := rm.Provision()
if err != nil {
return err
}
}
return nil
}
func (m matchHeaderRE) Validate() error {
for _, rm := range m {
err := rm.Validate()
if err != nil {
return err
}
}
return nil
}
func (m matchProtocol) Match(r *http.Request) bool {
switch string(m) {
case "grpc":
return r.Header.Get("content-type") == "application/grpc"
case "https":
return r.TLS != nil
case "http":
return r.TLS == nil
}
return false
}
func (m matchStarlark) Match(r *http.Request) bool {
input := string(m)
thread := new(starlark.Thread)
env := caddyscript.MatcherEnv(r)
val, err := starlark.Eval(thread, "", input, env)
if err != nil {
// TODO: Can we detect this in Provision or Validate instead?
log.Printf("caddyscript for matcher is invalid: attempting to evaluate expression `%v` error `%v`", input, err)
return false
}
return val.String() == "True"
}
// matchRegexp is just the fields common among
// matchers that can use regular expressions.
type matchRegexp struct {
Name string `json:"name"`
Pattern string `json:"pattern"`
compiled *regexp.Regexp
}
func (mre *matchRegexp) Provision() error {
re, err := regexp.Compile(mre.Pattern)
if err != nil {
return fmt.Errorf("compiling matcher regexp %s: %v", mre.Pattern, err)
}
mre.compiled = re
return nil
}
func (mre *matchRegexp) Validate() error {
if mre.Name != "" && !wordRE.MatchString(mre.Name) {
return fmt.Errorf("invalid regexp name (must contain only word characters): %s", mre.Name)
}
return nil
}
func (mre *matchRegexp) match(input string, repl *Replacer, scope string) bool {
matches := mre.compiled.FindStringSubmatch(input)
if matches == nil {
return false
}
// save all capture groups, first by index
for i, match := range matches {
key := fmt.Sprintf("http.matchers.%s.%s.%d", scope, mre.Name, i)
repl.Map(key, match)
}
// then by name
for i, name := range mre.compiled.SubexpNames() {
if i != 0 && name != "" {
key := fmt.Sprintf("http.matchers.%s.%s.%s", scope, mre.Name, name)
repl.Map(key, matches[i])
}
}
return true
}
var wordRE = regexp.MustCompile(`\w+`)
// Interface guards
var (
_ RouteMatcher = (*matchHost)(nil)
_ RouteMatcher = (*matchPath)(nil)
_ RouteMatcher = (*matchPathRE)(nil)
_ RouteMatcher = (*matchMethod)(nil)
_ RouteMatcher = (*matchQuery)(nil)
_ RouteMatcher = (*matchHeader)(nil)
_ RouteMatcher = (*matchHeaderRE)(nil)
_ RouteMatcher = (*matchProtocol)(nil)
_ RouteMatcher = (*matchScript)(nil)
_ RouteMatcher = (*matchStarlark)(nil)
)

View file

@ -0,0 +1,346 @@
package caddyhttp
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"
)
func TestHostMatcher(t *testing.T) {
for i, tc := range []struct {
match matchHost
input string
expect bool
}{
{
match: matchHost{},
input: "example.com",
expect: false,
},
{
match: matchHost{"example.com"},
input: "example.com",
expect: true,
},
{
match: matchHost{"example.com"},
input: "foo.example.com",
expect: false,
},
{
match: matchHost{"example.com"},
input: "EXAMPLE.COM",
expect: true,
},
{
match: matchHost{"foo.example.com"},
input: "foo.example.com",
expect: true,
},
{
match: matchHost{"foo.example.com"},
input: "bar.example.com",
expect: false,
},
{
match: matchHost{"*.example.com"},
input: "example.com",
expect: false,
},
{
match: matchHost{"*.example.com"},
input: "foo.example.com",
expect: true,
},
{
match: matchHost{"*.example.com"},
input: "foo.bar.example.com",
expect: false,
},
{
match: matchHost{"*.example.com", "example.net"},
input: "example.net",
expect: true,
},
{
match: matchHost{"example.net", "*.example.com"},
input: "foo.example.com",
expect: true,
},
{
match: matchHost{"*.example.net", "*.*.example.com"},
input: "foo.bar.example.com",
expect: true,
},
{
match: matchHost{"*.example.net", "sub.*.example.com"},
input: "sub.foo.example.com",
expect: true,
},
{
match: matchHost{"*.example.net", "sub.*.example.com"},
input: "sub.foo.example.net",
expect: false,
},
} {
req := &http.Request{Host: tc.input}
actual := tc.match.Match(req)
if actual != tc.expect {
t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input)
continue
}
}
}
func TestPathMatcher(t *testing.T) {
for i, tc := range []struct {
match matchPath
input string
expect bool
}{
{
match: matchPath{},
input: "/",
expect: false,
},
{
match: matchPath{"/"},
input: "/",
expect: true,
},
{
match: matchPath{"/foo/bar"},
input: "/",
expect: false,
},
{
match: matchPath{"/foo/bar"},
input: "/foo/bar",
expect: true,
},
{
match: matchPath{"/foo/bar/"},
input: "/foo/bar",
expect: false,
},
{
match: matchPath{"/foo/bar/", "/other"},
input: "/other/",
expect: true,
},
} {
req := &http.Request{URL: &url.URL{Path: tc.input}}
actual := tc.match.Match(req)
if actual != tc.expect {
t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input)
continue
}
}
}
func TestPathREMatcher(t *testing.T) {
for i, tc := range []struct {
match matchPathRE
input string
expect bool
expectRepl map[string]string
}{
{
match: matchPathRE{},
input: "/",
expect: true,
},
{
match: matchPathRE{matchRegexp{Pattern: "/"}},
input: "/",
expect: true,
},
{
match: matchPathRE{matchRegexp{Pattern: "/foo"}},
input: "/foo",
expect: true,
},
{
match: matchPathRE{matchRegexp{Pattern: "/foo"}},
input: "/foo/",
expect: true,
},
{
match: matchPathRE{matchRegexp{Pattern: "/bar"}},
input: "/foo/",
expect: false,
},
{
match: matchPathRE{matchRegexp{Pattern: "^/bar"}},
input: "/foo/bar",
expect: false,
},
{
match: matchPathRE{matchRegexp{Pattern: "^/foo/(.*)/baz$", Name: "name"}},
input: "/foo/bar/baz",
expect: true,
expectRepl: map[string]string{"name.1": "bar"},
},
{
match: matchPathRE{matchRegexp{Pattern: "^/foo/(?P<myparam>.*)/baz$", Name: "name"}},
input: "/foo/bar/baz",
expect: true,
expectRepl: map[string]string{"name.myparam": "bar"},
},
} {
// compile the regexp and validate its name
err := tc.match.Provision()
if err != nil {
t.Errorf("Test %d %v: Provisioning: %v", i, tc.match, err)
continue
}
err = tc.match.Validate()
if err != nil {
t.Errorf("Test %d %v: Validating: %v", i, tc.match, err)
continue
}
// set up the fake request and its Replacer
req := &http.Request{URL: &url.URL{Path: tc.input}}
repl := &Replacer{req: req, resp: httptest.NewRecorder(), custom: make(map[string]string)}
ctx := context.WithValue(req.Context(), ReplacerCtxKey, repl)
req = req.WithContext(ctx)
actual := tc.match.Match(req)
if actual != tc.expect {
t.Errorf("Test %d [%v]: Expected %t, got %t for input '%s'",
i, tc.match.Pattern, tc.expect, actual, tc.input)
continue
}
for key, expectVal := range tc.expectRepl {
placeholder := fmt.Sprintf("{http.matchers.path_regexp.%s}", key)
actualVal := repl.Replace(placeholder, "<empty>")
if actualVal != expectVal {
t.Errorf("Test %d [%v]: Expected placeholder {http.matchers.path_regexp.%s} to be '%s' but got '%s'",
i, tc.match.Pattern, key, expectVal, actualVal)
continue
}
}
}
}
func TestHeaderMatcher(t *testing.T) {
for i, tc := range []struct {
match matchHeader
input http.Header // make sure these are canonical cased (std lib will do that in a real request)
expect bool
}{
{
match: matchHeader{"Field": []string{"foo"}},
input: http.Header{"Field": []string{"foo"}},
expect: true,
},
{
match: matchHeader{"Field": []string{"foo", "bar"}},
input: http.Header{"Field": []string{"bar"}},
expect: true,
},
{
match: matchHeader{"Field": []string{"foo", "bar"}},
input: http.Header{"Alakazam": []string{"kapow"}},
expect: false,
},
{
match: matchHeader{"Field": []string{"foo", "bar"}},
input: http.Header{"Field": []string{"kapow"}},
expect: false,
},
{
match: matchHeader{"Field": []string{"foo", "bar"}},
input: http.Header{"Field": []string{"kapow", "foo"}},
expect: true,
},
{
match: matchHeader{"Field1": []string{"foo"}, "Field2": []string{"bar"}},
input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"bar"}},
expect: true,
},
{
match: matchHeader{"field1": []string{"foo"}, "field2": []string{"bar"}},
input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"bar"}},
expect: true,
},
{
match: matchHeader{"field1": []string{"foo"}, "field2": []string{"bar"}},
input: http.Header{"Field1": []string{"foo"}, "Field2": []string{"kapow"}},
expect: false,
},
} {
req := &http.Request{Header: tc.input}
actual := tc.match.Match(req)
if actual != tc.expect {
t.Errorf("Test %d %v: Expected %t, got %t for '%s'", i, tc.match, tc.expect, actual, tc.input)
continue
}
}
}
func TestHeaderREMatcher(t *testing.T) {
for i, tc := range []struct {
match matchHeaderRE
input http.Header // make sure these are canonical cased (std lib will do that in a real request)
expect bool
expectRepl map[string]string
}{
{
match: matchHeaderRE{"Field": &matchRegexp{Pattern: "foo"}},
input: http.Header{"Field": []string{"foo"}},
expect: true,
},
{
match: matchHeaderRE{"Field": &matchRegexp{Pattern: "$foo^"}},
input: http.Header{"Field": []string{"foobar"}},
expect: false,
},
{
match: matchHeaderRE{"Field": &matchRegexp{Pattern: "^foo(.*)$", Name: "name"}},
input: http.Header{"Field": []string{"foobar"}},
expect: true,
expectRepl: map[string]string{"name.1": "bar"},
},
} {
// compile the regexp and validate its name
err := tc.match.Provision()
if err != nil {
t.Errorf("Test %d %v: Provisioning: %v", i, tc.match, err)
continue
}
err = tc.match.Validate()
if err != nil {
t.Errorf("Test %d %v: Validating: %v", i, tc.match, err)
continue
}
// set up the fake request and its Replacer
req := &http.Request{Header: tc.input, URL: new(url.URL)}
repl := &Replacer{req: req, resp: httptest.NewRecorder(), custom: make(map[string]string)}
ctx := context.WithValue(req.Context(), ReplacerCtxKey, repl)
req = req.WithContext(ctx)
actual := tc.match.Match(req)
if actual != tc.expect {
t.Errorf("Test %d [%v]: Expected %t, got %t for input '%s'",
i, tc.match, tc.expect, actual, tc.input)
continue
}
for key, expectVal := range tc.expectRepl {
placeholder := fmt.Sprintf("{http.matchers.header_regexp.%s}", key)
actualVal := repl.Replace(placeholder, "<empty>")
if actualVal != expectVal {
t.Errorf("Test %d [%v]: Expected placeholder {http.matchers.header_regexp.%s} to be '%s' but got '%s'",
i, tc.match, key, expectVal, actualVal)
continue
}
}
}
}

View file

@ -8,7 +8,9 @@ import (
)
// Replacer can replace values in strings based
// on a request and/or response writer.
// on a request and/or response writer. The zero
// Replacer is not valid; it must be initialized
// within this package.
type Replacer struct {
req *http.Request
resp http.ResponseWriter
@ -75,18 +77,17 @@ func (r *Replacer) defaults() map[string]string {
}(),
}
// TODO: why should header fields, cookies, and query params get special treatment like this?
// maybe they should be scoped by words like "request.header." just like everything else.
for field, vals := range r.req.Header {
m[">"+strings.ToLower(field)] = strings.Join(vals, ",")
}
for field, vals := range r.resp.Header() {
m["<"+strings.ToLower(field)] = strings.Join(vals, ",")
}
for _, cookie := range r.req.Cookies() {
m["~"+cookie.Name] = cookie.Value
}
for param, vals := range r.req.URL.Query() {
m["?"+param] = strings.Join(vals, ",")
}

View file

@ -7,16 +7,13 @@ import (
"net/http"
)
// TODO: Is this type really required? Wouldn't embedding the
// default ResponseWriter always work too, when wrapping it?
// ResponseWriterWrapper wraps an underlying ResponseWriter and
// promotes its Pusher/Flusher/CloseNotifier/Hijacker methods
// as well. To use this type, embed a pointer to it within your
// own struct type that implements the http.ResponseWriter
// interface, then call methods on the embedded value. You can
// make sure your type wraps correctly by asserting that it
// implements the HTTPInterfaces interface.
// promotes its Pusher/Flusher/Hijacker methods as well. To use
// this type, embed a pointer to it within your own struct type
// that implements the http.ResponseWriter interface, then call
// methods on the embedded value. You can make sure your type
// wraps correctly by asserting that it implements the
// HTTPInterfaces interface.
type ResponseWriterWrapper struct {
http.ResponseWriter
}
@ -40,15 +37,6 @@ func (rww *ResponseWriterWrapper) Flush() {
}
}
// CloseNotify implements http.CloseNotifier. It simply calls the underlying
// ResponseWriter's CloseNotify method if there is one, or panics.
func (rww *ResponseWriterWrapper) CloseNotify() <-chan bool {
if cn, ok := rww.ResponseWriter.(http.CloseNotifier); ok {
return cn.CloseNotify()
}
panic("not a close notifier")
}
// Push implements http.Pusher. It simply calls the underlying
// ResponseWriter's Push method if there is one, or returns an error.
func (rww *ResponseWriterWrapper) Push(target string, opts *http.PushOptions) error {
@ -63,7 +51,6 @@ type HTTPInterfaces interface {
http.ResponseWriter
http.Pusher
http.Flusher
http.CloseNotifier
http.Hijacker
}

View file

@ -13,7 +13,7 @@ type serverRoute struct {
Apply []json.RawMessage `json:"apply"`
Respond json.RawMessage `json:"respond"`
Exclusive bool `json:"exclusive"`
Terminal bool `json:"terminal"`
// decoded values
matchers []RouteMatcher
@ -49,9 +49,7 @@ routeLoop:
if responder == nil {
responder = route.responder
}
// TODO: Should exclusive apply to only middlewares, or responder too?
// i.e. what if they haven't set a responder yet, but the first middleware chain is exclusive...
if route.Exclusive {
if route.Terminal {
break
}
}