From a479943acd70068c4b80d3a8f4b8dd7ab93ca2ba Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Tue, 16 Aug 2022 08:48:57 -0600 Subject: [PATCH] caddyhttp: Smarter path matching and rewriting (#4948) Co-authored-by: RussellLuo --- modules/caddyhttp/caddyhttp.go | 35 +++ modules/caddyhttp/caddyhttp_test.go | 57 ++++ modules/caddyhttp/fileserver/staticfiles.go | 12 +- modules/caddyhttp/matchers.go | 322 +++++++++++++++----- modules/caddyhttp/matchers_test.go | 153 +++++++++- modules/caddyhttp/rewrite/rewrite.go | 110 +++++-- modules/caddyhttp/rewrite/rewrite_test.go | 41 +++ 7 files changed, 616 insertions(+), 114 deletions(-) diff --git a/modules/caddyhttp/caddyhttp.go b/modules/caddyhttp/caddyhttp.go index 784b2b90..c9cc9e6d 100644 --- a/modules/caddyhttp/caddyhttp.go +++ b/modules/caddyhttp/caddyhttp.go @@ -20,6 +20,7 @@ import ( "io" "net" "net/http" + "path" "path/filepath" "strconv" "strings" @@ -244,6 +245,40 @@ func SanitizedPathJoin(root, reqPath string) string { return path } +// CleanPath cleans path p according to path.Clean(), but only +// merges repeated slashes if collapseSlashes is true, and always +// preserves trailing slashes. +func CleanPath(p string, collapseSlashes bool) string { + if collapseSlashes { + return cleanPath(p) + } + + // insert an invalid/impossible URI character into each two consecutive + // slashes to expand empty path segments; then clean the path as usual, + // and then remove the remaining temporary characters. + const tmpCh = 0xff + var sb strings.Builder + for i, ch := range p { + if ch == '/' && i > 0 && p[i-1] == '/' { + sb.WriteByte(tmpCh) + } + sb.WriteRune(ch) + } + halfCleaned := cleanPath(sb.String()) + halfCleaned = strings.ReplaceAll(halfCleaned, string([]byte{tmpCh}), "") + + return halfCleaned +} + +// cleanPath does path.Clean(p) but preserves any trailing slash. +func cleanPath(p string) string { + cleaned := path.Clean(p) + if cleaned != "/" && strings.HasSuffix(p, "/") { + cleaned = cleaned + "/" + } + return cleaned +} + // tlsPlaceholderWrapper is a no-op listener wrapper that marks // where the TLS listener should be in a chain of listener wrappers. // It should only be used if another listener wrapper must be placed diff --git a/modules/caddyhttp/caddyhttp_test.go b/modules/caddyhttp/caddyhttp_test.go index 09011fe9..1bca4d60 100644 --- a/modules/caddyhttp/caddyhttp_test.go +++ b/modules/caddyhttp/caddyhttp_test.go @@ -92,3 +92,60 @@ func TestSanitizedPathJoin(t *testing.T) { } } } + +func TestCleanPath(t *testing.T) { + for i, tc := range []struct { + input string + mergeSlashes bool + expect string + }{ + { + input: "/foo", + expect: "/foo", + }, + { + input: "/foo/", + expect: "/foo/", + }, + { + input: "//foo", + expect: "//foo", + }, + { + input: "//foo", + mergeSlashes: true, + expect: "/foo", + }, + { + input: "/foo//bar/", + mergeSlashes: true, + expect: "/foo/bar/", + }, + { + input: "/foo/./.././bar", + expect: "/bar", + }, + { + input: "/foo//./..//./bar", + expect: "/foo//bar", + }, + { + input: "/foo///./..//./bar", + expect: "/foo///bar", + }, + { + input: "/foo///./..//.", + expect: "/foo//", + }, + { + input: "/foo//./bar", + expect: "/foo//bar", + }, + } { + actual := CleanPath(tc.input, tc.mergeSlashes) + if actual != tc.expect { + t.Errorf("Test %d [input='%s' mergeSlashes=%t]: Got '%s', expected '%s'", + i, tc.input, tc.mergeSlashes, actual, tc.expect) + } + } +} diff --git a/modules/caddyhttp/fileserver/staticfiles.go b/modules/caddyhttp/fileserver/staticfiles.go index a745d5a8..554f8d30 100644 --- a/modules/caddyhttp/fileserver/staticfiles.go +++ b/modules/caddyhttp/fileserver/staticfiles.go @@ -23,7 +23,6 @@ import ( weakrand "math/rand" "mime" "net/http" - "net/url" "os" "path" "path/filepath" @@ -236,16 +235,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c filesToHide := fsrv.transformHidePaths(repl) root := repl.ReplaceAll(fsrv.Root, ".") - // PathUnescape returns an error if the escapes aren't well-formed, - // meaning the count % matches the RFC. Return early if the escape is - // improper. - if _, err := url.PathUnescape(r.URL.Path); err != nil { - fsrv.logger.Debug("improper path escape", - zap.String("site_root", root), - zap.String("request_path", r.URL.Path), - zap.Error(err)) - return err - } + filename := caddyhttp.SanitizedPathJoin(root, r.URL.Path) fsrv.logger.Debug("sanitized path join", diff --git a/modules/caddyhttp/matchers.go b/modules/caddyhttp/matchers.go index bb957d6a..d6bbe7e4 100644 --- a/modules/caddyhttp/matchers.go +++ b/modules/caddyhttp/matchers.go @@ -23,7 +23,6 @@ import ( "net/textproto" "net/url" "path" - "path/filepath" "reflect" "regexp" "sort" @@ -63,20 +62,51 @@ type ( // Duplicate entries will return an error. MatchHost []string - // MatchPath matches requests by the URI's path (case-insensitive). Path - // matches are exact, but wildcards may be used: + // MatchPath case-insensitively matches requests by the URI's path. Path + // matching is exact, not prefix-based, giving you more control and clarity + // over matching. Wildcards (`*`) may be used: // - // - At the end, for a prefix match (`/prefix/*`) - // - At the beginning, for a suffix match (`*.suffix`) - // - On both sides, for a substring match (`*/contains/*`) + // - At the end only, for a prefix match (`/prefix/*`) + // - At the beginning only, for a suffix match (`*.suffix`) + // - On both sides only, for a substring match (`*/contains/*`) // - In the middle, for a globular match (`/accounts/*/info`) // + // Slashes are significant; i.e. `/foo*` matches `/foo`, `/foo/`, `/foo/bar`, + // and `/foobar`; but `/foo/*` does not match `/foo` or `/foobar`. Valid + // paths start with a slash `/`. + // + // Because there are, in general, multiple possible escaped forms of any + // path, path matchers operate in unescaped space; that is, path matchers + // should be written in their unescaped form to prevent ambiguities and + // possible security issues, as all request paths will be normalized to + // their unescaped forms before matcher evaluation. + // + // However, escape sequences in a match pattern are supported; they are + // compared with the request's raw/escaped path for those bytes only. + // In other words, a matcher of `/foo%2Fbar` will match a request path + // of precisely `/foo%2Fbar`, but not `/foo/bar`. It follows that matching + // the literal percent sign (%) in normalized space can be done using the + // escaped form, `%25`. + // + // Even though wildcards (`*`) operate in the normalized space, the special + // escaped wildcard (`%*`), which is not a valid escape sequence, may be + // used in place of a span that should NOT be decoded; that is, `/bands/%*` + // will match `/bands/AC%2fDC` whereas `/bands/*` will not. + // + // Even though path matching is done in normalized space, the special + // wildcard `%*` may be used in place of a span that should NOT be decoded; + // that is, `/bands/%*/` will match `/bands/AC%2fDC/` whereas `/bands/*/` + // will not. + // // This matcher is fast, so it does not support regular expressions or // capture groups. For slower but more powerful matching, use the - // path_regexp matcher. + // path_regexp matcher. (Note that due to the special treatment of + // escape sequences in matcher patterns, they may perform slightly slower + // in high-traffic environments.) MatchPath []string // MatchPathRE matches requests by a regular expression on the URI's path. + // Path matching is performed in the unescaped (decoded) form of the path. // // Upon a match, it adds placeholders to the request: `{http.regexp.name.capture_group}` // where `name` is the regular expression's name, and `capture_group` is either @@ -303,7 +333,8 @@ outer: // expression matchers. // // Example: -// expression host('localhost') +// +// expression host('localhost') func (MatchHost) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( "host", @@ -342,6 +373,7 @@ func (MatchPath) CaddyModule() caddy.ModuleInfo { // Provision lower-cases the paths in m to ensure case-insensitive matching. func (m MatchPath) Provision(_ caddy.Context) error { for i := range m { + // TODO: if m[i] == "*", put it first and delete all others (will always match) m[i] = strings.ToLower(m[i]) } return nil @@ -349,77 +381,108 @@ func (m MatchPath) Provision(_ caddy.Context) error { // Match returns true if r matches m. func (m MatchPath) Match(r *http.Request) bool { - // PathUnescape returns an error if the escapes aren't - // well-formed, meaning the count % matches the RFC. - // Return early if the escape is improper. - unescapedPath, err := url.PathUnescape(r.URL.Path) - if err != nil { - return false - } + // Even though RFC 9110 says that path matching is case-sensitive + // (https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.3), + // we do case-insensitive matching to mitigate security issues + // related to differences between operating systems, applications, + // etc; if case-sensitive matching is needed, the regex matcher + // can be used instead. + reqPath := strings.ToLower(r.URL.Path) - lowerPath := strings.ToLower(unescapedPath) - - // Clean the path, merges doubled slashes, etc. - // This ensures maliciously crafted requests can't bypass - // the path matcher. See #4407 - lowerPath = path.Clean(lowerPath) - - // see #2917; Windows ignores trailing dots and spaces + // See #2917; Windows ignores trailing dots and spaces // when accessing files (sigh), potentially causing a // security risk (cry) if PHP files end up being served // as static files, exposing the source code, instead of - // being matched by *.php to be treated as PHP scripts - lowerPath = strings.TrimRight(lowerPath, ". ") - - // Cleaning may remove the trailing slash, but we want to keep it - if lowerPath != "/" && strings.HasSuffix(r.URL.Path, "/") { - lowerPath = lowerPath + "/" - } + // being matched by *.php to be treated as PHP scripts. + reqPath = strings.TrimRight(reqPath, ". ") repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) - for _, matchPath := range m { - matchPath = repl.ReplaceAll(matchPath, "") + for _, matchPattern := range m { + matchPattern = repl.ReplaceAll(matchPattern, "") // special case: whole path is wildcard; this is unnecessary // as it matches all requests, which is the same as no matcher - if matchPath == "*" { + if matchPattern == "*" { return true } + // Clean the path, merge doubled slashes, etc. + // This ensures maliciously crafted requests can't bypass + // the path matcher. See #4407. Good security posture + // requires that we should do all we can to reduce any + // funny-looking paths into "normalized" forms such that + // weird variants can't sneak by. + // + // How we clean the path depends on the kind of pattern: + // we either merge slashes or we don't. If the pattern + // has double slashes, we preserve them in the path. + // + // TODO: Despite the fact that the *vast* majority of path + // matchers have only 1 pattern, a possible optimization is + // to remember the cleaned form of the path for future + // iterations; it's just that the way we clean depends on + // the kind of pattern. + + mergeSlashes := !strings.Contains(matchPattern, "//") + + // if '%' appears in the match pattern, we interpret that to mean + // the intent is to compare that part of the path in raw/escaped + // space; i.e. "%40"=="%40", not "@", and "%2F"=="%2F", not "/" + if strings.Contains(matchPattern, "%") { + reqPathForPattern := CleanPath(r.URL.EscapedPath(), mergeSlashes) + if m.matchPatternWithEscapeSequence(reqPathForPattern, matchPattern) { + return true + } + + // doing prefix/suffix/substring matches doesn't make sense + continue + } + + reqPathForPattern := CleanPath(reqPath, mergeSlashes) + + // for substring, prefix, and suffix matching, only perform those + // special, fast matches if they are the only wildcards in the pattern; + // otherwise we assume a globular match if any * appears in the middle + // special case: first and last characters are wildcard, // treat it as a fast substring match - if len(matchPath) > 1 && - strings.HasPrefix(matchPath, "*") && - strings.HasSuffix(matchPath, "*") { - if strings.Contains(lowerPath, matchPath[1:len(matchPath)-1]) { + if strings.Count(matchPattern, "*") == 2 && + strings.HasPrefix(matchPattern, "*") && + strings.HasSuffix(matchPattern, "*") && + strings.Count(matchPattern, "*") == 2 { + if strings.Contains(reqPathForPattern, matchPattern[1:len(matchPattern)-1]) { return true } continue } - // special case: first character is a wildcard, - // treat it as a fast suffix match - if strings.HasPrefix(matchPath, "*") { - if strings.HasSuffix(lowerPath, matchPath[1:]) { - return true + // only perform prefix/suffix match if it is the only wildcard... + // I think that is more correct most of the time + if strings.Count(matchPattern, "*") == 1 { + // special case: first character is a wildcard, + // treat it as a fast suffix match + if strings.HasPrefix(matchPattern, "*") { + if strings.HasSuffix(reqPathForPattern, matchPattern[1:]) { + return true + } + continue + } + + // special case: last character is a wildcard, + // treat it as a fast prefix match + if strings.HasSuffix(matchPattern, "*") { + if strings.HasPrefix(reqPathForPattern, matchPattern[:len(matchPattern)-1]) { + return true + } + continue } - continue } - // special case: last character is a wildcard, - // treat it as a fast prefix match - if strings.HasSuffix(matchPath, "*") { - if strings.HasPrefix(lowerPath, matchPath[:len(matchPath)-1]) { - return true - } - continue - } - - // for everything else, try globular matching, which also - // is exact matching if there are no glob/wildcard chars; - // can ignore error here because we can't handle it anyway - matches, _ := filepath.Match(matchPath, lowerPath) + // at last, use globular matching, which also is exact matching + // if there are no glob/wildcard chars; we ignore the error here + // because we can't handle it anyway + matches, _ := path.Match(matchPattern, reqPathForPattern) if matches { return true } @@ -427,11 +490,118 @@ func (m MatchPath) Match(r *http.Request) bool { return false } +func (MatchPath) matchPatternWithEscapeSequence(escapedPath, matchPath string) bool { + // We would just compare the pattern against r.URL.Path, + // but the pattern contains %, indicating that we should + // compare at least some part of the path in raw/escaped + // space, not normalized space; so we build the string we + // will compare against by adding the normalized parts + // of the path, then switching to the escaped parts where + // the pattern hints to us wherever % is present. + var sb strings.Builder + + // iterate the pattern and escaped path in lock-step; + // increment iPattern every time we consume a char from the pattern, + // increment iPath every time we consume a char from the path; + // iPattern and iPath are our cursors/iterator positions for each string + var iPattern, iPath int + for { + if iPattern >= len(matchPath) || iPath >= len(escapedPath) { + break + } + + // get the next character from the request path + + pathCh := string(escapedPath[iPath]) + var escapedPathCh string + + // normalize (decode) escape sequences + if pathCh == "%" && len(escapedPath) >= iPath+3 { + // hold onto this in case we find out the intent is to match in escaped space here; + // we lowercase it even though technically the spec says: "For consistency, URI + // producers and normalizers should use uppercase hexadecimal digits for all percent- + // encodings" (RFC 3986 section 2.1) - we lowercased the matcher pattern earlier in + // provisioning so we do the same here to gain case-insensitivity in equivalence; + // besides, this string is never shown visibly + escapedPathCh = strings.ToLower(escapedPath[iPath : iPath+3]) + + var err error + pathCh, err = url.PathUnescape(escapedPathCh) + if err != nil { + // should be impossible unless EscapedPath() is giving us an invalid sequence! + return false + } + iPath += 2 // escape sequence is 2 bytes longer than normal char + } + + // now get the next character from the pattern + + normalize := true + switch matchPath[iPattern] { + case '%': + // escape sequence + + // if not a wildcard ("%*"), compare literally; consume next two bytes of pattern + if len(matchPath) >= iPattern+3 && matchPath[iPattern+1] != '*' { + sb.WriteString(escapedPathCh) + iPath++ + iPattern += 2 + break + } + + // escaped wildcard sequence; consume next byte only ('*') + iPattern++ + normalize = false + + fallthrough + case '*': + // wildcard, so consume until next matching character + remaining := escapedPath[iPath:] + until := len(escapedPath) - iPath // go until end of string... + if iPattern < len(matchPath)-1 { // ...unless the * is not at the end + nextCh := matchPath[iPattern+1] + until = strings.IndexByte(remaining, nextCh) + if until == -1 { + // terminating char of wildcard span not found, so definitely no match + return false + } + } + if until == 0 { + // empty span; nothing to add on this iteration + break + } + next := remaining[:until] + if normalize { + var err error + next, err = url.PathUnescape(next) + if err != nil { + return false // should be impossible anyway + } + } + sb.WriteString(next) + iPath += until + default: + sb.WriteString(pathCh) + iPath++ + } + + iPattern++ + } + + // we can now treat rawpath globs (%*) as regular globs (*) + matchPath = strings.ReplaceAll(matchPath, "%*", "*") + + // ignore error here because we can't handle it anyway= + matches, _ := path.Match(matchPath, sb.String()) + return matches +} + // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: -// expression path('*substring*', '*suffix') +// +// expression path('*substring*', '*suffix') func (MatchPath) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( // name of the macro, this is the function name that users see when writing expressions. @@ -477,23 +647,10 @@ func (MatchPathRE) CaddyModule() caddy.ModuleInfo { func (m MatchPathRE) Match(r *http.Request) bool { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) - // PathUnescape returns an error if the escapes aren't - // well-formed, meaning the count % matches the RFC. - // Return early if the escape is improper. - unescapedPath, err := url.PathUnescape(r.URL.Path) - if err != nil { - return false - } - // Clean the path, merges doubled slashes, etc. // This ensures maliciously crafted requests can't bypass // the path matcher. See #4407 - cleanedPath := path.Clean(unescapedPath) - - // Cleaning may remove the trailing slash, but we want to keep it - if cleanedPath != "/" && strings.HasSuffix(r.URL.Path, "/") { - cleanedPath = cleanedPath + "/" - } + cleanedPath := cleanPath(r.URL.Path) return m.MatchRegexp.Match(cleanedPath, repl) } @@ -502,7 +659,8 @@ func (m MatchPathRE) Match(r *http.Request) bool { // expression matchers. // // Example: -// expression path_regexp('^/bar') +// +// expression path_regexp('^/bar') func (MatchPathRE) CELLibrary(ctx caddy.Context) (cel.Library, error) { unnamedPattern, err := CELMatcherImpl( "path_regexp", @@ -575,7 +733,8 @@ func (m MatchMethod) Match(r *http.Request) bool { // expression matchers. // // Example: -// expression method('PUT', 'POST') +// +// expression method('PUT', 'POST') func (MatchMethod) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "method", @@ -661,7 +820,8 @@ func (m MatchQuery) Match(r *http.Request) bool { // expression matchers. // // Example: -// expression query({'sort': 'asc'}) || query({'foo': ['*bar*', 'baz']}) +// +// expression query({'sort': 'asc'}) || query({'foo': ['*bar*', 'baz']}) func (MatchQuery) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "query", @@ -736,8 +896,9 @@ func (m MatchHeader) Match(r *http.Request) bool { // expression matchers. // // Example: -// expression header({'content-type': 'image/png'}) -// expression header({'foo': ['bar', 'baz']}) // match bar or baz +// +// expression header({'content-type': 'image/png'}) +// expression header({'foo': ['bar', 'baz']}) // match bar or baz func (MatchHeader) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "header", @@ -894,7 +1055,8 @@ func (m MatchHeaderRE) Validate() error { // expression matchers. // // Example: -// expression header_regexp('foo', 'Field', 'fo+') +// +// expression header_regexp('foo', 'Field', 'fo+') func (MatchHeaderRE) CELLibrary(ctx caddy.Context) (cel.Library, error) { unnamedPattern, err := CELMatcherImpl( "header_regexp", @@ -978,7 +1140,8 @@ func (m *MatchProtocol) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { // expression matchers. // // Example: -// expression protocol('https') +// +// expression protocol('https') func (MatchProtocol) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "protocol", @@ -1097,7 +1260,8 @@ func (m *MatchRemoteIP) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { // expression matchers. // // Example: -// expression remote_ip('forwarded', '192.168.0.0/16', '172.16.0.0/12', '10.0.0.0/8') +// +// expression remote_ip('forwarded', '192.168.0.0/16', '172.16.0.0/12', '10.0.0.0/8') func (MatchRemoteIP) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( // name of the macro, this is the function name that users see when writing expressions. diff --git a/modules/caddyhttp/matchers_test.go b/modules/caddyhttp/matchers_test.go index bd4606bc..4d5538cd 100644 --- a/modules/caddyhttp/matchers_test.go +++ b/modules/caddyhttp/matchers_test.go @@ -158,9 +158,10 @@ func TestHostMatcher(t *testing.T) { func TestPathMatcher(t *testing.T) { for i, tc := range []struct { - match MatchPath - input string - expect bool + match MatchPath // not URI-encoded because not parsing from a URI + input string // should be valid URI encoding (escaped) since it will become part of a request + expect bool + provisionErr bool }{ { match: MatchPath{}, @@ -252,6 +253,11 @@ func TestPathMatcher(t *testing.T) { input: "/FOOOO", expect: true, }, + { + match: MatchPath{"*.php"}, + input: "/foo/index.php. .", + expect: true, + }, { match: MatchPath{"/foo/bar.txt"}, input: "/foo/BAR.txt", @@ -263,10 +269,60 @@ func TestPathMatcher(t *testing.T) { expect: true, }, { - match: MatchPath{"/foo*"}, + match: MatchPath{"/foo"}, input: "//foo", expect: true, }, + { + match: MatchPath{"//foo"}, + input: "/foo", + expect: false, + }, + { + match: MatchPath{"//foo"}, + input: "//foo", + expect: true, + }, + { + match: MatchPath{"/foo//*"}, + input: "/foo//bar", + expect: true, + }, + { + match: MatchPath{"/foo//*"}, + input: "/foo/%2Fbar", + expect: true, + }, + { + match: MatchPath{"/foo/%2F*"}, + input: "/foo/%2Fbar", + expect: true, + }, + { + match: MatchPath{"/foo/%2F*"}, + input: "/foo//bar", + expect: false, + }, + { + match: MatchPath{"/foo//bar"}, + input: "/foo//bar", + expect: true, + }, + { + match: MatchPath{"/foo/*//bar"}, + input: "/foo///bar", + expect: true, + }, + { + match: MatchPath{"/foo/%*//bar"}, + input: "/foo///bar", + expect: true, + }, + { + match: MatchPath{"/foo/%*//bar"}, + input: "/foo//%2Fbar", + expect: true, + }, { match: MatchPath{"/foo*"}, input: "/%2F/foo", @@ -292,8 +348,79 @@ func TestPathMatcher(t *testing.T) { input: "/foo/bar", expect: true, }, + // notice these next three test cases are the same normalized path but are written differently + { + match: MatchPath{"/%25@.txt"}, + input: "/%25@.txt", + expect: true, + }, + { + match: MatchPath{"/%25@.txt"}, + input: "/%25%40.txt", + expect: true, + }, + { + match: MatchPath{"/%25%40.txt"}, + input: "/%25%40.txt", + expect: true, + }, + { + match: MatchPath{"/bands/*/*"}, + input: "/bands/AC%2FDC/T.N.T", + expect: false, // because * operates in normalized space + }, + { + match: MatchPath{"/bands/%*/%*"}, + input: "/bands/AC%2FDC/T.N.T", + expect: true, + }, + { + match: MatchPath{"/bands/%*/%*"}, + input: "/bands/AC/DC/T.N.T", + expect: false, + }, + { + match: MatchPath{"/bands/%*"}, + input: "/bands/AC/DC", + expect: false, // not a suffix match + }, + { + match: MatchPath{"/bands/%*"}, + input: "/bands/AC%2FDC", + expect: true, + }, + { + match: MatchPath{"/foo%2fbar/baz"}, + input: "/foo%2Fbar/baz", + expect: true, + }, + { + match: MatchPath{"/foo%2fbar/baz"}, + input: "/foo/bar/baz", + expect: false, + }, + { + match: MatchPath{"/foo/bar/baz"}, + input: "/foo%2fbar/baz", + expect: true, + }, } { - req := &http.Request{URL: &url.URL{Path: tc.input}} + err := tc.match.Provision(caddy.Context{}) + if err == nil && tc.provisionErr { + t.Errorf("Test %d %v: Expected error provisioning, but there was no error", i, tc.match) + } + if err != nil && !tc.provisionErr { + t.Errorf("Test %d %v: Expected no error provisioning, but there was an error: %v", i, tc.match, err) + } + if tc.provisionErr { + continue // if it's not supposed to provision properly, pointless to test it + } + + u, err := url.ParseRequestURI(tc.input) + if err != nil { + t.Fatalf("Test %d (%v): Invalid request URI (should be rejected by Go's HTTP server): %v", i, tc.input, err) + } + req := &http.Request{URL: u} repl := caddy.NewReplacer() ctx := context.WithValue(req.Context(), caddy.ReplacerCtxKey, repl) req = req.WithContext(ctx) @@ -387,6 +514,16 @@ func TestPathREMatcher(t *testing.T) { expect: true, expectRepl: map[string]string{"name.myparam": "bar"}, }, + { + match: MatchPathRE{MatchRegexp{Pattern: "^/%@.txt"}}, + input: "/%25@.txt", + expect: true, + }, + { + match: MatchPathRE{MatchRegexp{Pattern: "^/%25@.txt"}}, + input: "/%25@.txt", + expect: false, + }, } { // compile the regexp and validate its name err := tc.match.Provision(caddy.Context{}) @@ -401,7 +538,11 @@ func TestPathREMatcher(t *testing.T) { } // set up the fake request and its Replacer - req := &http.Request{URL: &url.URL{Path: tc.input}} + u, err := url.ParseRequestURI(tc.input) + if err != nil { + t.Fatalf("Test %d: Bad input URI: %v", i, err) + } + req := &http.Request{URL: u} repl := caddy.NewReplacer() ctx := context.WithValue(req.Context(), caddy.ReplacerCtxKey, repl) req = req.WithContext(ctx) diff --git a/modules/caddyhttp/rewrite/rewrite.go b/modules/caddyhttp/rewrite/rewrite.go index 7da23277..4316b5af 100644 --- a/modules/caddyhttp/rewrite/rewrite.go +++ b/modules/caddyhttp/rewrite/rewrite.go @@ -31,13 +31,25 @@ func init() { caddy.RegisterModule(Rewrite{}) } -// Rewrite is a middleware which can rewrite HTTP requests. +// Rewrite is a middleware which can rewrite/mutate HTTP requests. // -// The Method and URI properties are "setters": the request URI -// will be set to the given values. Other properties are "modifiers": -// they modify existing files but do not explicitly specify what the -// result will be. It is atypical to combine the use of setters and +// The Method and URI properties are "setters" (the request URI +// will be overwritten with the given values). Other properties are +// "modifiers" (they modify existing values in a differentiable +// way). It is atypical to combine the use of setters and // modifiers in a single rewrite. +// +// To ensure consistent behavior, prefix and suffix stripping is +// performed in the URL-decoded (unescaped, normalized) space by +// default except for the specific bytes where an escape sequence +// is used in the prefix or suffix pattern. +// +// For all modifiers, paths are cleaned before being modified so that +// multiple, consecutive slashes are collapsed into a single slash, +// and dot elements are resolved and removed. In the special case +// of a prefix, suffix, or substring containing "//" (repeated slashes), +// slashes will not be merged while cleaning the path so that +// the rewrite can be interpreted literally. type Rewrite struct { // Changes the request's HTTP verb. Method string `json:"method,omitempty"` @@ -59,9 +71,15 @@ type Rewrite struct { URI string `json:"uri,omitempty"` // Strips the given prefix from the beginning of the URI path. + // The prefix should be written in normalized (unescaped) form, + // but if an escaping (`%xx`) is used, the path will be required + // to have that same escape at that position in order to match. StripPathPrefix string `json:"strip_path_prefix,omitempty"` // Strips the given suffix from the end of the URI path. + // The suffix should be written in normalized (unescaped) form, + // but if an escaping (`%xx`) is used, the path will be required + // to have that same escape at that position in order to match. StripPathSuffix string `json:"strip_path_suffix,omitempty"` // Performs substring replacements on the URI. @@ -227,17 +245,18 @@ func (rewr Rewrite) Rewrite(r *http.Request, repl *caddy.Replacer) bool { // strip path prefix or suffix if rewr.StripPathPrefix != "" { prefix := repl.ReplaceAll(rewr.StripPathPrefix, "") - r.URL.RawPath = strings.TrimPrefix(r.URL.RawPath, prefix) - if p, err := url.PathUnescape(r.URL.RawPath); err == nil && p != "" { - r.URL.Path = p - } else { - r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) - } + mergeSlashes := !strings.Contains(prefix, "//") + changePath(r, func(escapedPath string) string { + escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes) + return trimPathPrefix(escapedPath, prefix) + }) } if rewr.StripPathSuffix != "" { suffix := repl.ReplaceAll(rewr.StripPathSuffix, "") - changePath(r, func(pathOrRawPath string) string { - return strings.TrimSuffix(pathOrRawPath, suffix) + mergeSlashes := !strings.Contains(suffix, "//") + changePath(r, func(escapedPath string) string { + escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes) + return reverse(trimPathPrefix(reverse(escapedPath), reverse(suffix))) }) } @@ -324,6 +343,58 @@ func buildQueryString(qs string, repl *caddy.Replacer) string { return sb.String() } +// trimPathPrefix is like strings.TrimPrefix, but customized for advanced URI +// path prefix matching. The string prefix will be trimmed from the beginning +// of escapedPath if escapedPath starts with prefix. Rather than a naive 1:1 +// comparison of each byte to determine if escapedPath starts with prefix, +// both strings are iterated in lock-step, and if prefix has a '%' encoding +// at a particular position, escapedPath must also have the same encoding +// representation for that character. In other words, if the prefix string +// uses the escaped form for a character, escapedPath must literally use the +// same escape at that position. Otherwise, all character comparisons are +// performed in normalized/unescaped space. +func trimPathPrefix(escapedPath, prefix string) string { + var iPath, iPrefix int + for { + if iPath >= len(escapedPath) || iPrefix >= len(prefix) { + break + } + + prefixCh := prefix[iPrefix] + ch := string(escapedPath[iPath]) + + if ch == "%" && prefixCh != '%' && len(escapedPath) >= iPath+3 { + var err error + ch, err = url.PathUnescape(escapedPath[iPath : iPath+3]) + if err != nil { + // should be impossible unless EscapedPath() is returning invalid values! + return escapedPath + } + iPath += 2 + } + + // prefix comparisons are case-insensitive to consistency with + // path matcher, which is case-insensitive for good reasons + if !strings.EqualFold(ch, string(prefixCh)) { + return escapedPath + } + + iPath++ + iPrefix++ + } + + // found matching prefix, trim it + return escapedPath[iPath:] +} + +func reverse(s string) string { + r := []rune(s) + for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 { + r[i], r[j] = r[j], r[i] + } + return string(r) +} + // substrReplacer describes either a simple and fast substring replacement. type substrReplacer struct { // A substring to find. Supports placeholders. @@ -351,8 +422,10 @@ func (rep substrReplacer) do(r *http.Request, repl *caddy.Replacer) { find := repl.ReplaceAll(rep.Find, "") replace := repl.ReplaceAll(rep.Replace, "") + mergeSlashes := !strings.Contains(rep.Find, "//") + changePath(r, func(pathOrRawPath string) string { - return strings.Replace(pathOrRawPath, find, replace, lim) + return strings.Replace(caddyhttp.CleanPath(pathOrRawPath, mergeSlashes), find, replace, lim) }) r.URL.RawQuery = strings.Replace(r.URL.RawQuery, find, replace, lim) @@ -380,16 +453,17 @@ func (rep regexReplacer) do(r *http.Request, repl *caddy.Replacer) { }) } -// changePath updates the path on the request URL. It first executes newVal on -// req.URL.RawPath, and if the result is a valid escaping, it will be copied -// into req.URL.Path; otherwise newVal is evaluated only on req.URL.Path. func changePath(req *http.Request, newVal func(pathOrRawPath string) string) { - req.URL.RawPath = newVal(req.URL.RawPath) + req.URL.RawPath = newVal(req.URL.EscapedPath()) if p, err := url.PathUnescape(req.URL.RawPath); err == nil && p != "" { req.URL.Path = p } else { req.URL.Path = newVal(req.URL.Path) } + // RawPath is only set if it's different from the normalized Path (std lib) + if req.URL.RawPath == req.URL.Path { + req.URL.RawPath = "" + } } // Interface guard diff --git a/modules/caddyhttp/rewrite/rewrite_test.go b/modules/caddyhttp/rewrite/rewrite_test.go index 84dce95e..bc20c853 100644 --- a/modules/caddyhttp/rewrite/rewrite_test.go +++ b/modules/caddyhttp/rewrite/rewrite_test.go @@ -235,6 +235,42 @@ func TestRewrite(t *testing.T) { input: newRequest(t, "GET", "/foo/prefix/bar"), expect: newRequest(t, "GET", "/foo/prefix/bar"), }, + { + rule: Rewrite{StripPathPrefix: "//prefix"}, + // scheme and host needed for URL parser to succeed in setting up test + input: newRequest(t, "GET", "http://host//prefix/foo/bar"), + expect: newRequest(t, "GET", "http://host/foo/bar"), + }, + { + rule: Rewrite{StripPathPrefix: "//prefix"}, + input: newRequest(t, "GET", "/prefix/foo/bar"), + expect: newRequest(t, "GET", "/prefix/foo/bar"), + }, + { + rule: Rewrite{StripPathPrefix: "/a%2Fb/c"}, + input: newRequest(t, "GET", "/a%2Fb/c/d"), + expect: newRequest(t, "GET", "/d"), + }, + { + rule: Rewrite{StripPathPrefix: "/a%2Fb/c"}, + input: newRequest(t, "GET", "/a%2fb/c/d"), + expect: newRequest(t, "GET", "/d"), + }, + { + rule: Rewrite{StripPathPrefix: "/a/b/c"}, + input: newRequest(t, "GET", "/a%2Fb/c/d"), + expect: newRequest(t, "GET", "/d"), + }, + { + rule: Rewrite{StripPathPrefix: "/a%2Fb/c"}, + input: newRequest(t, "GET", "/a/b/c/d"), + expect: newRequest(t, "GET", "/a/b/c/d"), + }, + { + rule: Rewrite{StripPathPrefix: "//a%2Fb/c"}, + input: newRequest(t, "GET", "/a/b/c/d"), + expect: newRequest(t, "GET", "/a/b/c/d"), + }, { rule: Rewrite{StripPathSuffix: "/suffix"}, @@ -251,6 +287,11 @@ func TestRewrite(t *testing.T) { input: newRequest(t, "GET", "/foo%2Fbar/suffix"), expect: newRequest(t, "GET", "/foo%2Fbar/"), }, + { + rule: Rewrite{StripPathSuffix: "%2fsuffix"}, + input: newRequest(t, "GET", "/foo%2Fbar%2fsuffix"), + expect: newRequest(t, "GET", "/foo%2Fbar"), + }, { rule: Rewrite{StripPathSuffix: "/suffix"}, input: newRequest(t, "GET", "/foo/suffix/bar"),