reverseproxy: Make shallow-ish clone of the request (#4551)

* reverseproxy: Make shallow-ish clone of the request

* Refactor request cloning into separate function

Co-authored-by: Matthew Holt <mholt@users.noreply.github.com>
This commit is contained in:
Francis Lavoie 2022-03-03 11:54:45 -05:00 committed by GitHub
parent 6b385a36f9
commit f5e104944e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -358,54 +358,20 @@ func (h *Handler) Cleanup() error {
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
// if enabled, buffer client request;
// this should only be enabled if the
// upstream requires it and does not
// work with "slow clients" (gunicorn,
// etc.) - this obviously has a perf
// overhead and makes the proxy at
// risk of exhausting memory and more
// susceptible to slowloris attacks,
// so it is strongly recommended to
// only use this feature if absolutely
// required, if read timeouts are set,
// and if body size is limited
if h.BufferRequests {
r.Body = h.bufferedBody(r.Body)
}
// prepare the request for proxying; this is needed only once
err := h.prepareRequest(r)
clonedReq, err := h.prepareRequest(r)
if err != nil {
return caddyhttp.Error(http.StatusInternalServerError,
fmt.Errorf("preparing request for upstream round-trip: %v", err))
}
// we will need the original headers and Host value if
// header operations are configured; and we should
// restore them after we're done if they are changed
// (for example, changing the outbound Host header
// should not permanently change r.Host; issue #3509)
reqHost := r.Host
reqHeader := r.Header
// sanitize the request URL; we expect it to not contain the scheme and host
// since those should be determined by r.TLS and r.Host respectively, but
// some clients may include it in the request-line, which is technically
// valid in HTTP, but breaks reverseproxy behaviour, overriding how the
// dialer will behave. See #4237 for context.
origURLScheme := r.URL.Scheme
origURLHost := r.URL.Host
r.URL.Scheme = ""
r.URL.Host = ""
// restore modifications to the request after we're done proxying
defer func() {
r.Host = reqHost // TODO: data race, see #4038
r.Header = reqHeader // TODO: data race, see #4038
r.URL.Scheme = origURLScheme
r.URL.Host = origURLHost
}()
// header operations are configured; this is so that each
// retry can apply the modifications, because placeholders
// may be used which depend on the selected upstream for
// their values
reqHost := clonedReq.Host
reqHeader := clonedReq.Header
start := time.Now()
defer func() {
@ -416,12 +382,12 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
var proxyErr error
for {
// choose an available upstream
upstream := h.LoadBalancing.SelectionPolicy.Select(h.Upstreams, r, w)
upstream := h.LoadBalancing.SelectionPolicy.Select(h.Upstreams, clonedReq, w)
if upstream == nil {
if proxyErr == nil {
proxyErr = fmt.Errorf("no upstreams available")
}
if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) {
if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, clonedReq) {
break
}
continue
@ -430,7 +396,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
// the dial address may vary per-request if placeholders are
// used, so perform those replacements here; the resulting
// DialInfo struct should have valid network address syntax
dialInfo, err := upstream.fillDialInfo(r)
dialInfo, err := upstream.fillDialInfo(clonedReq)
if err != nil {
return statusError(fmt.Errorf("making dial info: %v", err))
}
@ -451,17 +417,17 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
// mutate request headers according to this upstream;
// because we're in a retry loop, we have to copy
// headers (and the r.Host value) from the original
// headers (and the Host value) from the original
// so that each retry is identical to the first
if h.Headers != nil && h.Headers.Request != nil {
r.Header = make(http.Header)
copyHeader(r.Header, reqHeader)
r.Host = reqHost
h.Headers.Request.ApplyToRequest(r)
clonedReq.Header = make(http.Header)
copyHeader(clonedReq.Header, reqHeader)
clonedReq.Host = reqHost
h.Headers.Request.ApplyToRequest(clonedReq)
}
// proxy the request to that upstream
proxyErr = h.reverseProxy(w, r, repl, dialInfo, next)
proxyErr = h.reverseProxy(w, clonedReq, repl, dialInfo, next)
if proxyErr == nil || proxyErr == context.Canceled {
// context.Canceled happens when the downstream client
// cancels the request, which is not our failure
@ -480,7 +446,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
h.countFailure(upstream)
// if we've tried long enough, break
if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) {
if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, clonedReq) {
break
}
}
@ -488,14 +454,27 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
return statusError(proxyErr)
}
// prepareRequest modifies req so that it is ready to be proxied,
// except for directing to a specific upstream. This method mutates
// headers and other necessary properties of the request and should
// be done just once (before proxying) regardless of proxy retries.
// This assumes that no mutations of the request are performed
// by h during or after proxying.
func (h Handler) prepareRequest(req *http.Request) error {
// most of this is borrowed from the Go std lib reverse proxy
// prepareRequest clones req so that it can be safely modified without
// changing the original request or introducing data races. It then
// modifies it so that it is ready to be proxied, except for directing
// to a specific upstream. This method adjusts headers and other relevant
// properties of the cloned request and should be done just once (before
// proxying) regardless of proxy retries. This assumes that no mutations
// of the cloned request are performed by h during or after proxying.
func (h Handler) prepareRequest(req *http.Request) (*http.Request, error) {
req = cloneRequest(req)
// if enabled, buffer client request; this should only be
// enabled if the upstream requires it and does not work
// with "slow clients" (gunicorn, etc.) - this obviously
// has a perf overhead and makes the proxy at risk of
// exhausting memory and more susceptible to slowloris
// attacks, so it is strongly recommended to only use this
// feature if absolutely required, if read timeouts are
// set, and if body size is limited
if h.BufferRequests {
req.Body = h.bufferedBody(req.Body)
}
if req.ContentLength == 0 {
req.Body = nil // Issue golang/go#16036: nil Body for http.Transport retries
@ -560,7 +539,7 @@ func (h Handler) prepareRequest(req *http.Request) error {
req.Header.Set("X-Forwarded-Proto", proto)
}
return nil
return req, nil
}
// reverseProxy performs a round-trip to the given backend and processes the response with the client.
@ -807,7 +786,7 @@ func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr er
// directRequest modifies only req.URL so that it points to the upstream
// in the given DialInfo. It must modify ONLY the request URL.
func (h Handler) directRequest(req *http.Request, di DialInfo) {
func (Handler) directRequest(req *http.Request, di DialInfo) {
// we need a host, so set the upstream's host address
reqHost := di.Address
@ -845,6 +824,42 @@ func (h Handler) bufferedBody(originalBody io.ReadCloser) io.ReadCloser {
}
}
// cloneRequest makes a semi-deep clone of origReq.
//
// Most of this code is borrowed from the Go stdlib reverse proxy,
// but we make a shallow-ish clone the request (deep clone only
// the headers and URL) so we can avoid manipulating the original
// request when using it to proxy upstream. This prevents request
// corruption and data races.
func cloneRequest(origReq *http.Request) *http.Request {
req := new(http.Request)
*req = *origReq
if origReq.URL != nil {
newURL := new(url.URL)
*newURL = *origReq.URL
if origReq.URL.User != nil {
newURL.User = new(url.Userinfo)
*newURL.User = *origReq.URL.User
}
// sanitize the request URL; we expect it to not contain the
// scheme and host since those should be determined by r.TLS
// and r.Host respectively, but some clients may include it
// in the request-line, which is technically valid in HTTP,
// but breaks reverseproxy behaviour, overriding how the
// dialer will behave. See #4237 for context.
newURL.Scheme = ""
newURL.Host = ""
req.URL = newURL
}
if origReq.Header != nil {
req.Header = origReq.Header.Clone()
}
if origReq.Trailer != nil {
req.Trailer = origReq.Trailer.Clone()
}
return req
}
func copyHeader(dst, src http.Header) {
for k, vv := range src {
for _, v := range vv {