From 9d4ed3a3236df06e54c80c4f6633b66d68ad3673 Mon Sep 17 00:00:00 2001
From: Matt Holt <mholt@users.noreply.github.com>
Date: Thu, 17 Jun 2021 09:59:08 -0600
Subject: [PATCH] caddyhttp: Refactor and export SanitizedPathJoin for use in
 fastcgi (#4207)

---
 modules/caddyhttp/caddyhttp.go                | 29 ++++++
 modules/caddyhttp/caddyhttp_test.go           | 94 +++++++++++++++++++
 modules/caddyhttp/fileserver/browse.go        |  2 +-
 modules/caddyhttp/fileserver/matcher.go       |  2 +-
 modules/caddyhttp/fileserver/matcher_test.go  |  4 +-
 modules/caddyhttp/fileserver/staticfiles.go   | 40 +-------
 .../caddyhttp/fileserver/staticfiles_test.go  | 84 -----------------
 .../caddyhttp/reverseproxy/fastcgi/fastcgi.go | 10 +-
 8 files changed, 131 insertions(+), 134 deletions(-)
 create mode 100644 modules/caddyhttp/caddyhttp_test.go

diff --git a/modules/caddyhttp/caddyhttp.go b/modules/caddyhttp/caddyhttp.go
index d93a5c8a2..784b2b900 100644
--- a/modules/caddyhttp/caddyhttp.go
+++ b/modules/caddyhttp/caddyhttp.go
@@ -20,7 +20,9 @@ import (
 	"io"
 	"net"
 	"net/http"
+	"path/filepath"
 	"strconv"
+	"strings"
 
 	"github.com/caddyserver/caddy/v2"
 	"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
@@ -217,6 +219,31 @@ func StatusCodeMatches(actual, configured int) bool {
 	return false
 }
 
+// SanitizedPathJoin performs filepath.Join(root, reqPath) that
+// is safe against directory traversal attacks. It uses logic
+// similar to that in the Go standard library, specifically
+// in the implementation of http.Dir. The root is assumed to
+// be a trusted path, but reqPath is not; and the output will
+// never be outside of root. The resulting path can be used
+// with the local file system.
+func SanitizedPathJoin(root, reqPath string) string {
+	if root == "" {
+		root = "."
+	}
+
+	path := filepath.Join(root, filepath.Clean("/"+reqPath))
+
+	// filepath.Join also cleans the path, and cleaning strips
+	// the trailing slash, so we need to re-add it afterwards.
+	// if the length is 1, then it's a path to the root,
+	// and that should return ".", so we don't append the separator.
+	if strings.HasSuffix(reqPath, "/") && len(reqPath) > 1 {
+		path += separator
+	}
+
+	return path
+}
+
 // tlsPlaceholderWrapper is a no-op listener wrapper that marks
 // where the TLS listener should be in a chain of listener wrappers.
 // It should only be used if another listener wrapper must be placed
@@ -242,6 +269,8 @@ const (
 	DefaultHTTPSPort = 443
 )
 
+const separator = string(filepath.Separator)
+
 // Interface guard
 var _ caddy.ListenerWrapper = (*tlsPlaceholderWrapper)(nil)
 var _ caddyfile.Unmarshaler = (*tlsPlaceholderWrapper)(nil)
diff --git a/modules/caddyhttp/caddyhttp_test.go b/modules/caddyhttp/caddyhttp_test.go
new file mode 100644
index 000000000..09011fe94
--- /dev/null
+++ b/modules/caddyhttp/caddyhttp_test.go
@@ -0,0 +1,94 @@
+package caddyhttp
+
+import (
+	"net/url"
+	"path/filepath"
+	"testing"
+)
+
+func TestSanitizedPathJoin(t *testing.T) {
+	// For reference:
+	// %2e = .
+	// %2f = /
+	// %5c = \
+	for i, tc := range []struct {
+		inputRoot string
+		inputPath string
+		expect    string
+	}{
+		{
+			inputPath: "",
+			expect:    ".",
+		},
+		{
+			inputPath: "/",
+			expect:    ".",
+		},
+		{
+			inputPath: "/foo",
+			expect:    "foo",
+		},
+		{
+			inputPath: "/foo/",
+			expect:    "foo" + separator,
+		},
+		{
+			inputPath: "/foo/bar",
+			expect:    filepath.Join("foo", "bar"),
+		},
+		{
+			inputRoot: "/a",
+			inputPath: "/foo/bar",
+			expect:    filepath.Join("/", "a", "foo", "bar"),
+		},
+		{
+			inputPath: "/foo/../bar",
+			expect:    "bar",
+		},
+		{
+			inputRoot: "/a/b",
+			inputPath: "/foo/../bar",
+			expect:    filepath.Join("/", "a", "b", "bar"),
+		},
+		{
+			inputRoot: "/a/b",
+			inputPath: "/..%2fbar",
+			expect:    filepath.Join("/", "a", "b", "bar"),
+		},
+		{
+			inputRoot: "/a/b",
+			inputPath: "/%2e%2e%2fbar",
+			expect:    filepath.Join("/", "a", "b", "bar"),
+		},
+		{
+			inputRoot: "/a/b",
+			inputPath: "/%2e%2e%2f%2e%2e%2f",
+			expect:    filepath.Join("/", "a", "b") + separator,
+		},
+		{
+			inputRoot: "C:\\www",
+			inputPath: "/foo/bar",
+			expect:    filepath.Join("C:\\www", "foo", "bar"),
+		},
+		{
+			inputRoot: "C:\\www",
+			inputPath: "/D:\\foo\\bar",
+			expect:    filepath.Join("C:\\www", "D:\\foo\\bar"),
+		},
+	} {
+		// we don't *need* to use an actual parsed URL, but it
+		// adds some authenticity to the tests since real-world
+		// values will be coming in from URLs; thus, the test
+		// corpus can contain paths as encoded by clients, which
+		// more closely emulates the actual attack vector
+		u, err := url.Parse("http://test:9999" + tc.inputPath)
+		if err != nil {
+			t.Fatalf("Test %d: invalid URL: %v", i, err)
+		}
+		actual := SanitizedPathJoin(tc.inputRoot, u.Path)
+		if actual != tc.expect {
+			t.Errorf("Test %d: SanitizedPathJoin('%s', '%s') =>  %s (expected '%s')",
+				i, tc.inputRoot, tc.inputPath, actual, tc.expect)
+		}
+	}
+}
diff --git a/modules/caddyhttp/fileserver/browse.go b/modules/caddyhttp/fileserver/browse.go
index 3122f12b4..cd9bcbcd7 100644
--- a/modules/caddyhttp/fileserver/browse.go
+++ b/modules/caddyhttp/fileserver/browse.go
@@ -207,7 +207,7 @@ func isSymlinkTargetDir(f os.FileInfo, root, urlPath string) bool {
 	if !isSymlink(f) {
 		return false
 	}
-	target := sanitizedPathJoin(root, path.Join(urlPath, f.Name()))
+	target := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, f.Name()))
 	targetInfo, err := os.Stat(target)
 	if err != nil {
 		return false
diff --git a/modules/caddyhttp/fileserver/matcher.go b/modules/caddyhttp/fileserver/matcher.go
index 064398c67..739b9e056 100644
--- a/modules/caddyhttp/fileserver/matcher.go
+++ b/modules/caddyhttp/fileserver/matcher.go
@@ -185,7 +185,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) {
 		if strings.HasSuffix(file, "/") {
 			suffix += "/"
 		}
-		fullpath = sanitizedPathJoin(root, suffix)
+		fullpath = caddyhttp.SanitizedPathJoin(root, suffix)
 		return
 	}
 
diff --git a/modules/caddyhttp/fileserver/matcher_test.go b/modules/caddyhttp/fileserver/matcher_test.go
index e3199077f..746a5d785 100644
--- a/modules/caddyhttp/fileserver/matcher_test.go
+++ b/modules/caddyhttp/fileserver/matcher_test.go
@@ -94,7 +94,7 @@ func TestFileMatcher(t *testing.T) {
 			t.Fatalf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath)
 		}
 
-		fileType, ok := repl.Get("http.matchers.file.type")
+		fileType, _ := repl.Get("http.matchers.file.type")
 		if fileType != tc.expectedType {
 			t.Fatalf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType)
 		}
@@ -197,7 +197,7 @@ func TestPHPFileMatcher(t *testing.T) {
 			t.Fatalf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath)
 		}
 
-		fileType, ok := repl.Get("http.matchers.file.type")
+		fileType, _ := repl.Get("http.matchers.file.type")
 		if fileType != tc.expectedType {
 			t.Fatalf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType)
 		}
diff --git a/modules/caddyhttp/fileserver/staticfiles.go b/modules/caddyhttp/fileserver/staticfiles.go
index 9266332a0..45f8610dc 100644
--- a/modules/caddyhttp/fileserver/staticfiles.go
+++ b/modules/caddyhttp/fileserver/staticfiles.go
@@ -161,7 +161,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
 	filesToHide := fsrv.transformHidePaths(repl)
 
 	root := repl.ReplaceAll(fsrv.Root, ".")
-	filename := sanitizedPathJoin(root, r.URL.Path)
+	filename := caddyhttp.SanitizedPathJoin(root, r.URL.Path)
 
 	fsrv.logger.Debug("sanitized path join",
 		zap.String("site_root", root),
@@ -185,7 +185,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
 	var implicitIndexFile bool
 	if info.IsDir() && len(fsrv.IndexNames) > 0 {
 		for _, indexPage := range fsrv.IndexNames {
-			indexPath := sanitizedPathJoin(filename, indexPage)
+			indexPath := caddyhttp.SanitizedPathJoin(filename, indexPage)
 			if fileHidden(indexPath, filesToHide) {
 				// pretend this file doesn't exist
 				fsrv.logger.Debug("hiding index file",
@@ -435,42 +435,6 @@ func (fsrv *FileServer) transformHidePaths(repl *caddy.Replacer) []string {
 	return hide
 }
 
-// sanitizedPathJoin performs filepath.Join(root, reqPath) that
-// is safe against directory traversal attacks. It uses logic
-// similar to that in the Go standard library, specifically
-// in the implementation of http.Dir. The root is assumed to
-// be a trusted path, but reqPath is not.
-func sanitizedPathJoin(root, reqPath string) string {
-	// TODO: Caddy 1 uses this:
-	// prevent absolute path access on Windows, e.g. http://localhost:5000/C:\Windows\notepad.exe
-	// if runtime.GOOS == "windows" && len(reqPath) > 0 && filepath.IsAbs(reqPath[1:]) {
-	// TODO.
-	// }
-
-	// TODO: whereas std lib's http.Dir.Open() uses this:
-	// if filepath.Separator != '/' && strings.ContainsRune(name, filepath.Separator) {
-	// 	return nil, errors.New("http: invalid character in file path")
-	// }
-
-	// TODO: see https://play.golang.org/p/oh77BiVQFti for another thing to consider
-
-	if root == "" {
-		root = "."
-	}
-
-	path := filepath.Join(root, filepath.Clean("/"+reqPath))
-
-	// filepath.Join also cleans the path, and cleaning strips
-	// the trailing slash, so we need to re-add it afterwards.
-	// if the length is 1, then it's a path to the root,
-	// and that should return ".", so we don't append the separator.
-	if strings.HasSuffix(reqPath, "/") && len(reqPath) > 1 {
-		path += separator
-	}
-
-	return path
-}
-
 // fileHidden returns true if filename is hidden according to the hide list.
 // filename must be a relative or absolute file system path, not a request
 // URI path. It is expected that all the paths in the hide list are absolute
diff --git a/modules/caddyhttp/fileserver/staticfiles_test.go b/modules/caddyhttp/fileserver/staticfiles_test.go
index 690f46ac4..5d6133c73 100644
--- a/modules/caddyhttp/fileserver/staticfiles_test.go
+++ b/modules/caddyhttp/fileserver/staticfiles_test.go
@@ -15,96 +15,12 @@
 package fileserver
 
 import (
-	"net/url"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"testing"
 )
 
-func TestSanitizedPathJoin(t *testing.T) {
-	// For easy reference:
-	// %2e = .
-	// %2f = /
-	// %5c = \
-	for i, tc := range []struct {
-		inputRoot string
-		inputPath string
-		expect    string
-	}{
-		{
-			inputPath: "",
-			expect:    ".",
-		},
-		{
-			inputPath: "/",
-			expect:    ".",
-		},
-		{
-			inputPath: "/foo",
-			expect:    "foo",
-		},
-		{
-			inputPath: "/foo/",
-			expect:    "foo" + separator,
-		},
-		{
-			inputPath: "/foo/bar",
-			expect:    filepath.Join("foo", "bar"),
-		},
-		{
-			inputRoot: "/a",
-			inputPath: "/foo/bar",
-			expect:    filepath.Join("/", "a", "foo", "bar"),
-		},
-		{
-			inputPath: "/foo/../bar",
-			expect:    "bar",
-		},
-		{
-			inputRoot: "/a/b",
-			inputPath: "/foo/../bar",
-			expect:    filepath.Join("/", "a", "b", "bar"),
-		},
-		{
-			inputRoot: "/a/b",
-			inputPath: "/..%2fbar",
-			expect:    filepath.Join("/", "a", "b", "bar"),
-		},
-		{
-			inputRoot: "/a/b",
-			inputPath: "/%2e%2e%2fbar",
-			expect:    filepath.Join("/", "a", "b", "bar"),
-		},
-		{
-			inputRoot: "/a/b",
-			inputPath: "/%2e%2e%2f%2e%2e%2f",
-			expect:    filepath.Join("/", "a", "b") + separator,
-		},
-		{
-			inputRoot: "C:\\www",
-			inputPath: "/foo/bar",
-			expect:    filepath.Join("C:\\www", "foo", "bar"),
-		},
-		// TODO: test more windows paths... on windows... sigh.
-	} {
-		// we don't *need* to use an actual parsed URL, but it
-		// adds some authenticity to the tests since real-world
-		// values will be coming in from URLs; thus, the test
-		// corpus can contain paths as encoded by clients, which
-		// more closely emulates the actual attack vector
-		u, err := url.Parse("http://test:9999" + tc.inputPath)
-		if err != nil {
-			t.Fatalf("Test %d: invalid URL: %v", i, err)
-		}
-		actual := sanitizedPathJoin(tc.inputRoot, u.Path)
-		if actual != tc.expect {
-			t.Errorf("Test %d: [%s %s] => %s (expected %s)",
-				i, tc.inputRoot, tc.inputPath, actual, tc.expect)
-		}
-	}
-}
-
 func TestFileHidden(t *testing.T) {
 	for i, tc := range []struct {
 		inputHide []string
diff --git a/modules/caddyhttp/reverseproxy/fastcgi/fastcgi.go b/modules/caddyhttp/reverseproxy/fastcgi/fastcgi.go
index d7a0e369b..40e0207fa 100644
--- a/modules/caddyhttp/reverseproxy/fastcgi/fastcgi.go
+++ b/modules/caddyhttp/reverseproxy/fastcgi/fastcgi.go
@@ -20,7 +20,6 @@ import (
 	"fmt"
 	"net"
 	"net/http"
-	"path"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -218,12 +217,7 @@ func (t Transport) buildEnv(r *http.Request) (map[string]string, error) {
 	}
 
 	// SCRIPT_FILENAME is the absolute path of SCRIPT_NAME
-	scriptFilename := filepath.Join(root, scriptName)
-
-	// Add vhost path prefix to scriptName. Otherwise, some PHP software will
-	// have difficulty discovering its URL.
-	pathPrefix, _ := r.Context().Value(caddy.CtxKey("path_prefix")).(string)
-	scriptName = path.Join(pathPrefix, scriptName)
+	scriptFilename := caddyhttp.SanitizedPathJoin(root, scriptName)
 
 	// Ensure the SCRIPT_NAME has a leading slash for compliance with RFC3875
 	// Info: https://tools.ietf.org/html/rfc3875#section-4.1.13
@@ -288,7 +282,7 @@ func (t Transport) buildEnv(r *http.Request) (map[string]string, error) {
 	// PATH_TRANSLATED should only exist if PATH_INFO is defined.
 	// Info: https://www.ietf.org/rfc/rfc3875 Page 14
 	if env["PATH_INFO"] != "" {
-		env["PATH_TRANSLATED"] = filepath.Join(root, pathInfo) // Info: http://www.oreilly.com/openbook/cgi/ch02_04.html
+		env["PATH_TRANSLATED"] = caddyhttp.SanitizedPathJoin(root, pathInfo) // Info: http://www.oreilly.com/openbook/cgi/ch02_04.html
 	}
 
 	// compliance with the CGI specification requires that