fileserver: Support virtual file systems (#4909)

* fileserver: Support virtual file systems (close #3720)

This change replaces the hard-coded use of os.Open() and os.Stat() with
the use of the new (Go 1.16) io/fs APIs, enabling virtual file systems.
It introduces a new module namespace, caddy.fs, for such file systems.

Also improve documentation for the file server. I realized it was one of
the first modules written for Caddy 2, and the docs hadn't really been
updated since!

* Virtualize FS for file matcher; minor tweaks

* Fix tests and rename dirFS -> osFS

(Since we do not use a root directory, it is dynamic.)
This commit is contained in:
Matt Holt 2022-07-30 13:07:44 -06:00 committed by GitHub
parent 07ed3e7c30
commit 6668271661
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 166 additions and 55 deletions

View file

@ -19,6 +19,7 @@ import (
_ "embed"
"encoding/json"
"fmt"
"io/fs"
"net/http"
"os"
"path"
@ -80,7 +81,7 @@ func (fsrv *FileServer) serveBrowse(root, dirPath string, w http.ResponseWriter,
repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
// calling path.Clean here prevents weird breadcrumbs when URL paths are sketchy like /%2e%2e%2f
listing, err := fsrv.loadDirectoryContents(dir, root, path.Clean(r.URL.Path), repl)
listing, err := fsrv.loadDirectoryContents(dir.(fs.ReadDirFile), root, path.Clean(r.URL.Path), repl)
switch {
case os.IsPermission(err):
return caddyhttp.Error(http.StatusForbidden, err)
@ -133,8 +134,8 @@ func (fsrv *FileServer) serveBrowse(root, dirPath string, w http.ResponseWriter,
return nil
}
func (fsrv *FileServer) loadDirectoryContents(dir *os.File, root, urlPath string, repl *caddy.Replacer) (browseTemplateContext, error) {
files, err := dir.Readdir(-1)
func (fsrv *FileServer) loadDirectoryContents(dir fs.ReadDirFile, root, urlPath string, repl *caddy.Replacer) (browseTemplateContext, error) {
files, err := dir.ReadDir(10000) // TODO: this limit should probably be configurable
if err != nil {
return browseTemplateContext{}, err
}
@ -201,25 +202,25 @@ func (fsrv *FileServer) makeBrowseTemplate(tplCtx *templateContext) (*template.T
return tpl, nil
}
// isSymlink return true if f is a symbolic link
func isSymlink(f os.FileInfo) bool {
return f.Mode()&os.ModeSymlink != 0
}
// isSymlinkTargetDir returns true if f's symbolic link target
// is a directory.
func isSymlinkTargetDir(f os.FileInfo, root, urlPath string) bool {
func (fsrv *FileServer) isSymlinkTargetDir(f fs.FileInfo, root, urlPath string) bool {
if !isSymlink(f) {
return false
}
target := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, f.Name()))
targetInfo, err := os.Stat(target)
targetInfo, err := fsrv.fileSystem.Stat(target)
if err != nil {
return false
}
return targetInfo.IsDir()
}
// isSymlink return true if f is a symbolic link.
func isSymlink(f fs.FileInfo) bool {
return f.Mode()&os.ModeSymlink != 0
}
// templateContext powers the context used when evaluating the browse template.
// It combines browse-specific features with the standard templates handler
// features.

View file

@ -15,6 +15,7 @@
package fileserver
import (
"io/fs"
"net/url"
"os"
"path"
@ -26,22 +27,31 @@ import (
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
"github.com/dustin/go-humanize"
"go.uber.org/zap"
)
func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root, urlPath string, repl *caddy.Replacer) browseTemplateContext {
func (fsrv *FileServer) directoryListing(entries []fs.DirEntry, canGoUp bool, root, urlPath string, repl *caddy.Replacer) browseTemplateContext {
filesToHide := fsrv.transformHidePaths(repl)
var dirCount, fileCount int
fileInfos := []fileInfo{}
for _, f := range files {
name := f.Name()
for _, entry := range entries {
name := entry.Name()
if fileHidden(name, filesToHide) {
continue
}
isDir := f.IsDir() || isSymlinkTargetDir(f, root, urlPath)
info, err := entry.Info()
if err != nil {
fsrv.logger.Error("could not get info about directory entry",
zap.String("name", entry.Name()),
zap.String("root", root))
continue
}
isDir := entry.IsDir() || fsrv.isSymlinkTargetDir(info, root, urlPath)
// add the slash after the escape of path to avoid escaping the slash as well
if isDir {
@ -51,11 +61,11 @@ func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root
fileCount++
}
size := f.Size()
fileIsSymlink := isSymlink(f)
size := info.Size()
fileIsSymlink := isSymlink(info)
if fileIsSymlink {
path := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, f.Name()))
fileInfo, err := os.Stat(path)
path := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, info.Name()))
fileInfo, err := fsrv.fileSystem.Stat(path)
if err == nil {
size = fileInfo.Size()
}
@ -73,8 +83,8 @@ func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root
Name: name,
Size: size,
URL: u.String(),
ModTime: f.ModTime().UTC(),
Mode: f.Mode(),
ModTime: info.ModTime().UTC(),
Mode: info.Mode(),
})
}
name, _ := url.PathUnescape(urlPath)

View file

@ -15,7 +15,9 @@
package fileserver
import (
"encoding/json"
"fmt"
"io/fs"
"net/http"
"os"
"path"
@ -54,6 +56,11 @@ func init() {
// - `{http.matchers.file.remainder}` Set to the remainder
// of the path if the path was split by `split_path`.
type MatchFile struct {
// The file system implementation to use. By default, the
// local disk file system will be used.
FileSystemRaw json.RawMessage `json:"file_system,omitempty" caddy:"namespace=caddy.fs inline_key=backend"`
fileSystem fs.StatFS
// The root directory, used for creating absolute
// file paths, and required when working with
// relative paths; if not specified, `{http.vars.root}`
@ -241,10 +248,23 @@ func celFileMatcherMacroExpander() parser.MacroExpander {
}
// Provision sets up m's defaults.
func (m *MatchFile) Provision(_ caddy.Context) error {
func (m *MatchFile) Provision(ctx caddy.Context) error {
// establish the file system to use
if len(m.FileSystemRaw) > 0 {
mod, err := ctx.LoadModule(m, "FileSystemRaw")
if err != nil {
return fmt.Errorf("loading file system module: %v", err)
}
m.fileSystem = mod.(fs.StatFS)
}
if m.fileSystem == nil {
m.fileSystem = osFS{}
}
if m.Root == "" {
m.Root = "{http.vars.root}"
}
// if list of files to try was omitted entirely, assume URL path
// (use placeholder instead of r.URL.Path; see issue #4146)
if m.TryFiles == nil {
@ -316,7 +336,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) {
return
}
suffix, fullpath, remainder := prepareFilePath(f)
if info, exists := strictFileExists(fullpath); exists {
if info, exists := m.strictFileExists(fullpath); exists {
setPlaceholders(info, suffix, fullpath, remainder)
return true
}
@ -330,7 +350,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) {
var info os.FileInfo
for _, f := range m.TryFiles {
suffix, fullpath, splitRemainder := prepareFilePath(f)
info, err := os.Stat(fullpath)
info, err := m.fileSystem.Stat(fullpath)
if err == nil && info.Size() > largestSize {
largestSize = info.Size()
largestFilename = fullpath
@ -349,7 +369,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) {
var info os.FileInfo
for _, f := range m.TryFiles {
suffix, fullpath, splitRemainder := prepareFilePath(f)
info, err := os.Stat(fullpath)
info, err := m.fileSystem.Stat(fullpath)
if err == nil && (smallestSize == 0 || info.Size() < smallestSize) {
smallestSize = info.Size()
smallestFilename = fullpath
@ -368,7 +388,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) {
var info os.FileInfo
for _, f := range m.TryFiles {
suffix, fullpath, splitRemainder := prepareFilePath(f)
info, err := os.Stat(fullpath)
info, err := m.fileSystem.Stat(fullpath)
if err == nil &&
(recentDate.IsZero() || info.ModTime().After(recentDate)) {
recentDate = info.ModTime()
@ -404,8 +424,8 @@ func parseErrorCode(input string) error {
// the file must also be a directory; if it does
// NOT end in a forward slash, the file must NOT
// be a directory.
func strictFileExists(file string) (os.FileInfo, bool) {
stat, err := os.Stat(file)
func (m MatchFile) strictFileExists(file string) (os.FileInfo, bool) {
stat, err := m.fileSystem.Stat(file)
if err != nil {
// in reality, this can be any error
// such as permission or even obscure

View file

@ -111,8 +111,9 @@ func TestFileMatcher(t *testing.T) {
},
} {
m := &MatchFile{
Root: "./testdata",
TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/"},
fileSystem: osFS{},
Root: "./testdata",
TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/"},
}
u, err := url.Parse(tc.path)
@ -213,9 +214,10 @@ func TestPHPFileMatcher(t *testing.T) {
},
} {
m := &MatchFile{
Root: "./testdata",
TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/index.php"},
SplitPath: []string{".php"},
fileSystem: osFS{},
Root: "./testdata",
TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/index.php"},
SplitPath: []string{".php"},
}
u, err := url.Parse(tc.path)

View file

@ -15,7 +15,11 @@
package fileserver
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
weakrand "math/rand"
"mime"
"net/http"
@ -39,10 +43,63 @@ func init() {
caddy.RegisterModule(FileServer{})
}
// FileServer implements a static file server responder for Caddy.
// FileServer implements a handler that serves static files.
//
// The path of the file to serve is constructed by joining the site root
// and the sanitized request path. Any and all files within the root and
// links with targets outside the site root may therefore be accessed.
// For example, with a site root of `/www`, requests to `/foo/bar.txt`
// will serve the file at `/www/foo/bar.txt`.
//
// The request path is sanitized using the Go standard library's
// path.Clean() function (https://pkg.go.dev/path#Clean) before being
// joined to the root. Request paths must be valid and well-formed.
//
// For requests that access directories instead of regular files,
// Caddy will attempt to serve an index file if present. For example,
// a request to `/dir/` will attempt to serve `/dir/index.html` if
// it exists. The index file names to try are configurable. If a
// requested directory does not have an index file, Caddy writes a
// 404 response. Alternatively, file browsing can be enabled with
// the "browse" parameter which shows a list of files when directories
// are requested if no index file is present.
//
// By default, this handler will canonicalize URIs so that requests to
// directories end with a slash, but requests to regular files do not.
// This is enforced with HTTP redirects automatically and can be disabled.
// Canonicalization redirects are not issued, however, if a URI rewrite
// modified the last component of the path (the filename).
//
// This handler sets the Etag and Last-Modified headers for static files.
// It does not perform MIME sniffing to determine Content-Type based on
// contents, but does use the extension (if known); see the Go docs for
// details: https://pkg.go.dev/mime#TypeByExtension
//
// The file server properly handles requests with If-Match,
// If-Unmodified-Since, If-Modified-Since, If-None-Match, Range, and
// If-Range headers. It includes the file's modification time in the
// Last-Modified header of the response.
type FileServer struct {
// The file system implementation to use. By default, Caddy uses the local
// disk file system.
//
// File system modules used here must adhere to the following requirements:
// - Implement fs.StatFS interface.
// - Support seeking on opened files; i.e.returned fs.File values must
// implement the io.Seeker interface. This is required for determining
// Content-Length and satisfying Range requests.
// - fs.File values that represent directories must implement the
// fs.ReadDirFile interface so that directory listings can be procured.
FileSystemRaw json.RawMessage `json:"file_system,omitempty" caddy:"namespace=caddy.fs inline_key=backend"`
fileSystem fs.StatFS
// The path to the root of the site. Default is `{http.vars.root}` if set,
// or current working directory otherwise.
// or current working directory otherwise. This should be a trusted value.
//
// Note that a site root is not a sandbox. Although the file server does
// sanitize the request URI to prevent directory traversal, files (including
// links) within the site root may be directly accessed based on the request
// path. Files and folders within the root should be secure and trustworthy.
Root string `json:"root,omitempty"`
// A list of files or folders to hide; the file server will pretend as if
@ -63,6 +120,7 @@ type FileServer struct {
Hide []string `json:"hide,omitempty"`
// The names of files to try as index files if a folder is requested.
// Default: index.html, index.txt.
IndexNames []string `json:"index_names,omitempty"`
// Enables file listings if a directory was requested and no index
@ -95,8 +153,7 @@ type FileServer struct {
// If no order specified here, the first encoding from the Accept-Encoding header
// that both client and server support is used
PrecompressedOrder []string `json:"precompressed_order,omitempty"`
precompressors map[string]encode.Precompressed
precompressors map[string]encode.Precompressed
logger *zap.Logger
}
@ -113,6 +170,18 @@ func (FileServer) CaddyModule() caddy.ModuleInfo {
func (fsrv *FileServer) Provision(ctx caddy.Context) error {
fsrv.logger = ctx.Logger(fsrv)
// establish which file system (possibly a virtual one) we'll be using
if len(fsrv.FileSystemRaw) > 0 {
mod, err := ctx.LoadModule(fsrv, "FileSystemRaw")
if err != nil {
return fmt.Errorf("loading file system module: %v", err)
}
fsrv.fileSystem = mod.(fs.StatFS)
}
if fsrv.fileSystem == nil {
fsrv.fileSystem = osFS{}
}
if fsrv.Root == "" {
fsrv.Root = "{http.vars.root}"
}
@ -131,6 +200,7 @@ func (fsrv *FileServer) Provision(ctx caddy.Context) error {
}
}
// support precompressed sidecar files
mods, err := ctx.LoadModule(fsrv, "PrecompressedRaw")
if err != nil {
return fmt.Errorf("loading encoder modules: %v", err)
@ -184,12 +254,12 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
zap.String("result", filename))
// get information about the file
info, err := os.Stat(filename)
info, err := fsrv.fileSystem.Stat(filename)
if err != nil {
err = mapDirOpenError(err, filename)
if os.IsNotExist(err) {
err = fsrv.mapDirOpenError(err, filename)
if errors.Is(err, fs.ErrNotExist) {
return fsrv.notFound(w, r, next)
} else if os.IsPermission(err) {
} else if errors.Is(err, fs.ErrPermission) {
return caddyhttp.Error(http.StatusForbidden, err)
}
return caddyhttp.Error(http.StatusInternalServerError, err)
@ -210,7 +280,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
continue
}
indexInfo, err := os.Stat(indexPath)
indexInfo, err := fsrv.fileSystem.Stat(indexPath)
if err != nil {
continue
}
@ -280,7 +350,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
}
}
var file *os.File
var file fs.File
// check for precompressed files
for _, ae := range encode.AcceptedEncodings(r, fsrv.PrecompressedOrder) {
@ -289,7 +359,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
continue
}
compressedFilename := filename + precompress.Suffix()
compressedInfo, err := os.Stat(compressedFilename)
compressedInfo, err := fsrv.fileSystem.Stat(compressedFilename)
if err != nil || compressedInfo.IsDir() {
fsrv.logger.Debug("precompressed file not accessible", zap.String("filename", compressedFilename), zap.Error(err))
continue
@ -328,14 +398,12 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
// set the ETag - note that a conditional If-None-Match request is handled
// by http.ServeContent below, which checks against this ETag value
w.Header().Set("ETag", calculateEtag(info))
w.Header().Set("Etag", calculateEtag(info))
if w.Header().Get("Content-Type") == "" {
mtyp := mime.TypeByExtension(filepath.Ext(filename))
if mtyp == "" {
// do not allow Go to sniff the content-type; see
// https://www.youtube.com/watch?v=8t8JYpt0egE
// TODO: If we want a Content-Type, consider writing a default of application/octet-stream - this is secure but violates spec
// do not allow Go to sniff the content-type; see https://www.youtube.com/watch?v=8t8JYpt0egE
w.Header()["Content-Type"] = nil
} else {
w.Header().Set("Content-Type", mtyp)
@ -375,7 +443,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
// that errors generated by ServeContent are written immediately
// to the response, so we cannot handle them (but errors there
// are rare)
http.ServeContent(w, r, info.Name(), info.ModTime(), file)
http.ServeContent(w, r, info.Name(), info.ModTime(), file.(io.ReadSeeker))
return nil
}
@ -384,10 +452,10 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c
// the response is configured to inform the client how to best handle it
// and a well-described handler error is returned (do not wrap the
// returned error value).
func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (*os.File, error) {
file, err := os.Open(filename)
func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (fs.File, error) {
file, err := fsrv.fileSystem.Open(filename)
if err != nil {
err = mapDirOpenError(err, filename)
err = fsrv.mapDirOpenError(err, filename)
if os.IsNotExist(err) {
fsrv.logger.Debug("file not found", zap.String("filename", filename), zap.Error(err))
return nil, caddyhttp.Error(http.StatusNotFound, err)
@ -412,8 +480,8 @@ func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (*os.Fi
// Adapted from the Go standard library; originally written by Nathaniel Caza.
// https://go-review.googlesource.com/c/go/+/36635/
// https://go-review.googlesource.com/c/go/+/36804/
func mapDirOpenError(originalErr error, name string) error {
if os.IsNotExist(originalErr) || os.IsPermission(originalErr) {
func (fsrv *FileServer) mapDirOpenError(originalErr error, name string) error {
if errors.Is(originalErr, fs.ErrNotExist) || errors.Is(originalErr, fs.ErrPermission) {
return originalErr
}
@ -422,12 +490,12 @@ func mapDirOpenError(originalErr error, name string) error {
if parts[i] == "" {
continue
}
fi, err := os.Stat(strings.Join(parts[:i+1], separator))
fi, err := fsrv.fileSystem.Stat(strings.Join(parts[:i+1], separator))
if err != nil {
return originalErr
}
if !fi.IsDir() {
return os.ErrNotExist
return fs.ErrNotExist
}
}
@ -545,6 +613,16 @@ func (wr statusOverrideResponseWriter) WriteHeader(int) {
wr.ResponseWriter.WriteHeader(wr.code)
}
// osFS is a simple fs.StatFS implementation that uses the local
// file system. (We do not use os.DirFS because we do our own
// rooting or path prefixing without being constrained to a single
// root folder. The standard os.DirFS implementation is problematic
// since roots can be dynamic in our application.)
type osFS struct{}
func (osFS) Open(name string) (fs.File, error) { return os.Open(name) }
func (osFS) Stat(name string) (fs.FileInfo, error) { return os.Stat(name) }
var defaultIndexNames = []string{"index.html", "index.txt"}
const (