Simple rewrite regex captures (#2592)

* More informative rewrite test output

When running rewrite tests, the output in case of a test failure now
includes not only the rewritten URLs but also the from URL.

* Move re-escaping to regexpMatches

This commit moves the code to post-process the match replacements from
ComplexRule to regexpMatches, so it can later be re-used for SimpleRule.

Also changes the comment in an attempt to better explain the reasoning
behind that code.

The required strings.Replacer is now built only once.

* Support regex captures in simple rewrite rules

Closes #2586
This commit is contained in:
AndreKR 2019-05-24 20:00:27 +02:00 committed by Matt Holt
parent 81430e4aff
commit 4bad5c79be
2 changed files with 25 additions and 12 deletions

View file

@ -95,9 +95,15 @@ func (s *SimpleRule) Match(r *http.Request) bool {
// Rewrite rewrites the internal location of the current request.
func (s *SimpleRule) Rewrite(fs http.FileSystem, r *http.Request) Result {
matches := regexpMatches(s.Regexp, "/", r.URL.Path)
replacer := newReplacer(r)
for i := 1; i < len(matches); i++ {
replacer.Set(fmt.Sprint(i), matches[i])
}
// attempt rewrite
return To(fs, r, s.To, newReplacer(r))
return To(fs, r, s.To, replacer)
}
// ComplexRule is a rewrite rule based on a regular expression
@ -198,16 +204,7 @@ func (r ComplexRule) Rewrite(fs http.FileSystem, req *http.Request) (re Result)
default:
// set regexp match variables {1}, {2} ...
// url escaped values of ? and #.
q, f := url.QueryEscape("?"), url.QueryEscape("#")
for i := 1; i < len(matches); i++ {
// Special case of unescaped # and ? by stdlib regexp.
// Reverse the unescape.
if strings.ContainsAny(matches[i], "?#") {
matches[i] = strings.NewReplacer("?", q, "#", f).Replace(matches[i])
}
replacer.Set(fmt.Sprint(i), matches[i])
}
}
@ -246,6 +243,8 @@ func (r ComplexRule) matchExt(rPath string) bool {
return !mustUse
}
var escaper = strings.NewReplacer("?", url.QueryEscape("?"), "#", url.QueryEscape("#"))
func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string {
if regexp != nil {
// include trailing slash in regexp if present
@ -253,7 +252,19 @@ func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string {
if strings.HasSuffix(base, "/") {
start--
}
return regexp.FindStringSubmatch(rPath[start:])
matches := regexp.FindStringSubmatch(rPath[start:])
// When processing a rewrite rule, the matching is done with an unescaped
// version of the old path. However, when such a match contains a ? or #
// character, the match cannot be used verbatim in the "to" URL because
// there it would be interpreted as query/fragment marker, so we re-escape
// those characters:
for i := 1; i < len(matches); i++ {
matches[i] = escaper.Replace(matches[i])
}
return matches
}
return nil
}

View file

@ -33,6 +33,7 @@ func TestRewrite(t *testing.T) {
newSimpleRule(t, "^/from$", "/to"),
newSimpleRule(t, "^/a$", "/b"),
newSimpleRule(t, "^/b$", "/b{uri}"),
newSimpleRule(t, "^/simplereggrp/([0-9]+)([a-z]*)$", "/{1}/{2}/{query}"),
},
FileSys: http.Dir("."),
}
@ -113,6 +114,7 @@ func TestRewrite(t *testing.T) {
{"/hashtest/a%20%23%20test", "/a%20%23%20test"},
{"/hashtest/a%20%3F%20test", "/a%20%3F%20test"},
{"/hashtest/a%20%3F%23test", "/a%20%3F%23test"},
{"/simplereggrp/123abc?q", "/123/abc/q?q"},
}
for i, test := range tests {
@ -129,7 +131,7 @@ func TestRewrite(t *testing.T) {
}
if got, want := rec.Body.String(), test.expectedTo; got != want {
t.Errorf("Test %d: Expected URL to be '%s' but was '%s'", i, want, got)
t.Errorf("Test %d: Expected URL '%s' to be rewritten to '%s' but was rewritten to '%s'", i, test.from, want, got)
}
}
}