From c09e86fddc8a6c0da59fd62a186cab1482faae2a Mon Sep 17 00:00:00 2001
From: Matthew Holt <mholt@users.noreply.github.com>
Date: Fri, 13 Sep 2019 16:24:51 -0600
Subject: [PATCH] headers: Add ability to replace substrings in header fields

This will probably be useful so the proxy can rewrite header values.
---
 modules/caddyhttp/headers/headers.go | 160 +++++++++++++++++++++++----
 1 file changed, 137 insertions(+), 23 deletions(-)

diff --git a/modules/caddyhttp/headers/headers.go b/modules/caddyhttp/headers/headers.go
index ba9d89d1d..71e575dd2 100644
--- a/modules/caddyhttp/headers/headers.go
+++ b/modules/caddyhttp/headers/headers.go
@@ -15,7 +15,9 @@
 package headers
 
 import (
+	"fmt"
 	"net/http"
+	"regexp"
 	"strings"
 
 	"github.com/caddyserver/caddy/v2"
@@ -23,43 +25,61 @@ import (
 )
 
 func init() {
-	caddy.RegisterModule(Headers{})
+	caddy.RegisterModule(Handler{})
 }
 
-// Headers is a middleware which can mutate HTTP headers.
-type Headers struct {
+// Handler is a middleware which can mutate HTTP headers.
+type Handler struct {
 	Request  *HeaderOps     `json:"request,omitempty"`
 	Response *RespHeaderOps `json:"response,omitempty"`
 }
 
 // CaddyModule returns the Caddy module information.
-func (Headers) CaddyModule() caddy.ModuleInfo {
+func (Handler) CaddyModule() caddy.ModuleInfo {
 	return caddy.ModuleInfo{
 		Name: "http.handlers.headers",
-		New:  func() caddy.Module { return new(Headers) },
+		New:  func() caddy.Module { return new(Handler) },
 	}
 }
 
-// HeaderOps defines some operations to
-// perform on HTTP headers.
-type HeaderOps struct {
-	Add    http.Header `json:"add,omitempty"`
-	Set    http.Header `json:"set,omitempty"`
-	Delete []string    `json:"delete,omitempty"`
+// Provision sets up h's configuration.
+func (h *Handler) Provision(_ caddy.Context) error {
+	if h.Request != nil {
+		err := h.Request.provision()
+		if err != nil {
+			return err
+		}
+	}
+	if h.Response != nil {
+		err := h.Response.provision()
+		if err != nil {
+			return err
+		}
+	}
+	return nil
 }
 
-// RespHeaderOps is like HeaderOps, but
-// optionally deferred until response time.
-type RespHeaderOps struct {
-	*HeaderOps
-	Require  *caddyhttp.ResponseMatcher `json:"require,omitempty"`
-	Deferred bool                       `json:"deferred,omitempty"`
+// Validate ensures h's configuration is valid.
+func (h Handler) Validate() error {
+	if h.Request != nil {
+		err := h.Request.validate()
+		if err != nil {
+			return err
+		}
+	}
+	if h.Response != nil {
+		err := h.Response.validate()
+		if err != nil {
+			return err
+		}
+	}
+	return nil
 }
 
-func (h Headers) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
+func (h Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
 	repl := r.Context().Value(caddy.ReplacerCtxKey).(caddy.Replacer)
 
-	apply(h.Request, r.Header, repl)
+	h.Request.applyTo(r.Header, repl)
 
 	// request header's Host is handled specially by the
 	// Go standard library, so if that header was changed,
@@ -79,22 +99,79 @@ func (h Headers) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhtt
 				headerOps:             h.Response.HeaderOps,
 			}
 		} else {
-			apply(h.Response.HeaderOps, w.Header(), repl)
+			h.Response.applyTo(w.Header(), repl)
 		}
 	}
+
 	return next.ServeHTTP(w, r)
 }
 
-func apply(ops *HeaderOps, hdr http.Header, repl caddy.Replacer) {
+// HeaderOps defines some operations to
+// perform on HTTP headers.
+type HeaderOps struct {
+	Add     http.Header              `json:"add,omitempty"`
+	Set     http.Header              `json:"set,omitempty"`
+	Delete  []string                 `json:"delete,omitempty"`
+	Replace map[string][]Replacement `json:"replace,omitempty"`
+}
+
+func (ops *HeaderOps) provision() error {
+	for fieldName, replacements := range ops.Replace {
+		for i, r := range replacements {
+			if r.SearchRegexp != "" {
+				re, err := regexp.Compile(r.SearchRegexp)
+				if err != nil {
+					return fmt.Errorf("replacement %d for header field '%s': %v", i, fieldName, err)
+				}
+				replacements[i].re = re
+			}
+		}
+	}
+	return nil
+}
+
+func (ops HeaderOps) validate() error {
+	for fieldName, replacements := range ops.Replace {
+		for _, r := range replacements {
+			if r.Search != "" && r.SearchRegexp != "" {
+				return fmt.Errorf("cannot specify both a substring search and a regular expression search for field '%s'", fieldName)
+			}
+		}
+	}
+	return nil
+}
+
+// Replacement describes a string replacement,
+// either a simple and fast sugbstring search
+// or a slower but more powerful regex search.
+type Replacement struct {
+	Search       string `json:"search,omitempty"`
+	SearchRegexp string `json:"search_regexp,omitempty"`
+	Replace      string `json:"replace,omitempty"`
+
+	re *regexp.Regexp
+}
+
+// RespHeaderOps is like HeaderOps, but
+// optionally deferred until response time.
+type RespHeaderOps struct {
+	*HeaderOps
+	Require  *caddyhttp.ResponseMatcher `json:"require,omitempty"`
+	Deferred bool                       `json:"deferred,omitempty"`
+}
+
+func (ops *HeaderOps) applyTo(hdr http.Header, repl caddy.Replacer) {
 	if ops == nil {
 		return
 	}
+
 	for fieldName, vals := range ops.Add {
 		fieldName = repl.ReplaceAll(fieldName, "")
 		for _, v := range vals {
 			hdr.Add(fieldName, repl.ReplaceAll(v, ""))
 		}
 	}
+
 	for fieldName, vals := range ops.Set {
 		fieldName = repl.ReplaceAll(fieldName, "")
 		for i := range vals {
@@ -102,9 +179,45 @@ func apply(ops *HeaderOps, hdr http.Header, repl caddy.Replacer) {
 		}
 		hdr.Set(fieldName, strings.Join(vals, ","))
 	}
+
 	for _, fieldName := range ops.Delete {
 		hdr.Del(repl.ReplaceAll(fieldName, ""))
 	}
+
+	for fieldName, replacements := range ops.Replace {
+		fieldName = repl.ReplaceAll(fieldName, "")
+
+		// perform replacements across all fields
+		if fieldName == "*" {
+			for _, r := range replacements {
+				search := repl.ReplaceAll(r.Search, "")
+				replace := repl.ReplaceAll(r.Replace, "")
+				for fieldName, vals := range hdr {
+					for i := range vals {
+						if r.re != nil {
+							hdr[fieldName][i] = r.re.ReplaceAllString(hdr[fieldName][i], replace)
+						} else {
+							hdr[fieldName][i] = strings.ReplaceAll(hdr[fieldName][i], search, replace)
+						}
+					}
+				}
+			}
+			continue
+		}
+
+		// perform replacements only with the named field
+		for _, r := range replacements {
+			search := repl.ReplaceAll(r.Search, "")
+			replace := repl.ReplaceAll(r.Replace, "")
+			for i := range hdr[fieldName] {
+				if r.re != nil {
+					hdr[fieldName][i] = r.re.ReplaceAllString(hdr[fieldName][i], replace)
+				} else {
+					hdr[fieldName][i] = strings.ReplaceAll(hdr[fieldName][i], search, replace)
+				}
+			}
+		}
+	}
 }
 
 // responseWriterWrapper defers response header
@@ -123,7 +236,7 @@ func (rww *responseWriterWrapper) WriteHeader(status int) {
 	}
 	rww.wroteHeader = true
 	if rww.require == nil || rww.require.Match(status, rww.ResponseWriterWrapper.Header()) {
-		apply(rww.headerOps, rww.ResponseWriterWrapper.Header(), rww.replacer)
+		rww.headerOps.applyTo(rww.ResponseWriterWrapper.Header(), rww.replacer)
 	}
 	rww.ResponseWriterWrapper.WriteHeader(status)
 }
@@ -137,6 +250,7 @@ func (rww *responseWriterWrapper) Write(d []byte) (int, error) {
 
 // Interface guards
 var (
-	_ caddyhttp.MiddlewareHandler = (*Headers)(nil)
+	_ caddy.Provisioner           = (*Handler)(nil)
+	_ caddyhttp.MiddlewareHandler = (*Handler)(nil)
 	_ caddyhttp.HTTPInterfaces    = (*responseWriterWrapper)(nil)
 )