From 3903642aa7cf711cb20f27958e7ee5b15fe2bbb3 Mon Sep 17 00:00:00 2001
From: Mohammed Al Sahaf <msaa1990@gmail.com>
Date: Fri, 9 Apr 2021 18:06:25 +0000
Subject: [PATCH] caddyfile: reject cyclic imports (#4022)

* caddyfile: reject recursive self-imports

* caddyfile: detect and reject cyclic imports of snippets and files

* caddyfile: do not be stickler about connected nodes not being connected already

* caddyfile: include missing test artifacts of cyclic imports

* address review comments
---
 caddyconfig/caddyfile/importgraph.go          | 127 ++++++++++++++++++
 caddyconfig/caddyfile/lexer.go                |   8 +-
 caddyconfig/caddyfile/parse.go                |  37 ++++-
 caddyconfig/caddyfile/parse_test.go           |  22 +++
 .../caddyfile/testdata/import_recursive0.txt  |   1 +
 .../caddyfile/testdata/import_recursive1.txt  |   1 +
 .../caddyfile/testdata/import_recursive2.txt  |   1 +
 .../caddyfile/testdata/import_recursive3.txt  |   1 +
 8 files changed, 193 insertions(+), 5 deletions(-)
 create mode 100644 caddyconfig/caddyfile/importgraph.go
 create mode 100644 caddyconfig/caddyfile/testdata/import_recursive0.txt
 create mode 100644 caddyconfig/caddyfile/testdata/import_recursive1.txt
 create mode 100644 caddyconfig/caddyfile/testdata/import_recursive2.txt
 create mode 100644 caddyconfig/caddyfile/testdata/import_recursive3.txt

diff --git a/caddyconfig/caddyfile/importgraph.go b/caddyconfig/caddyfile/importgraph.go
new file mode 100644
index 000000000..659c36800
--- /dev/null
+++ b/caddyconfig/caddyfile/importgraph.go
@@ -0,0 +1,127 @@
+// Copyright 2015 Matthew Holt and The Caddy Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package caddyfile
+
+import (
+	"fmt"
+)
+
+type adjacency map[string][]string
+
+type importGraph struct {
+	nodes map[string]bool
+	edges adjacency
+}
+
+func (i *importGraph) addNode(name string) {
+	if i.nodes == nil {
+		i.nodes = make(map[string]bool)
+	}
+	if _, exists := i.nodes[name]; exists {
+		return
+	}
+	i.nodes[name] = true
+}
+func (i *importGraph) addNodes(names []string) {
+	for _, name := range names {
+		i.addNode(name)
+	}
+}
+
+func (i *importGraph) removeNode(name string) {
+	delete(i.nodes, name)
+}
+func (i *importGraph) removeNodes(names []string) {
+	for _, name := range names {
+		i.removeNode(name)
+	}
+}
+
+func (i *importGraph) addEdge(from, to string) error {
+	if !i.exists(from) || !i.exists(to) {
+		return fmt.Errorf("one of the nodes does not exist")
+	}
+
+	if i.willCycle(to, from) {
+		return fmt.Errorf("a cycle of imports exists between %s and %s", from, to)
+	}
+
+	if i.areConnected(from, to) {
+		// if connected, there's nothing to do
+		return nil
+	}
+
+	if i.nodes == nil {
+		i.nodes = make(map[string]bool)
+	}
+	if i.edges == nil {
+		i.edges = make(adjacency)
+	}
+
+	i.edges[from] = append(i.edges[from], to)
+	return nil
+}
+func (i *importGraph) addEdges(from string, tos []string) error {
+	for _, to := range tos {
+		err := i.addEdge(from, to)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (i *importGraph) areConnected(from, to string) bool {
+	al, ok := i.edges[from]
+	if !ok {
+		return false
+	}
+	for _, v := range al {
+		if v == to {
+			return true
+		}
+	}
+	return false
+}
+
+func (i *importGraph) willCycle(from, to string) bool {
+	collector := make(map[string]bool)
+
+	var visit func(string)
+	visit = func(start string) {
+		if !collector[start] {
+			collector[start] = true
+			for _, v := range i.edges[start] {
+				visit(v)
+			}
+		}
+	}
+
+	for _, v := range i.edges[from] {
+		visit(v)
+	}
+	for k := range collector {
+		if to == k {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (i *importGraph) exists(key string) bool {
+	_, exists := i.nodes[key]
+	return exists
+}
diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go
index 188ef0649..f4da2391e 100755
--- a/caddyconfig/caddyfile/lexer.go
+++ b/caddyconfig/caddyfile/lexer.go
@@ -35,9 +35,11 @@ type (
 
 	// Token represents a single parsable unit.
 	Token struct {
-		File string
-		Line int
-		Text string
+		File        string
+		Line        int
+		Text        string
+		inSnippet   bool
+		snippetName string
 	}
 )
 
diff --git a/caddyconfig/caddyfile/parse.go b/caddyconfig/caddyfile/parse.go
index 50e3244be..52abf4786 100755
--- a/caddyconfig/caddyfile/parse.go
+++ b/caddyconfig/caddyfile/parse.go
@@ -16,6 +16,7 @@ package caddyfile
 
 import (
 	"bytes"
+	"fmt"
 	"io/ioutil"
 	"log"
 	"os"
@@ -40,7 +41,13 @@ func Parse(filename string, input []byte) ([]ServerBlock, error) {
 	if err != nil {
 		return nil, err
 	}
-	p := parser{Dispenser: NewDispenser(tokens)}
+	p := parser{
+		Dispenser: NewDispenser(tokens),
+		importGraph: importGraph{
+			nodes: make(map[string]bool),
+			edges: make(adjacency),
+		},
+	}
 	return p.parseAll()
 }
 
@@ -110,6 +117,7 @@ type parser struct {
 	eof             bool        // if we encounter a valid EOF in a hard place
 	definedSnippets map[string][]Token
 	nesting         int
+	importGraph     importGraph
 }
 
 func (p *parser) parseAll() ([]ServerBlock, error) {
@@ -165,6 +173,15 @@ func (p *parser) begin() error {
 		if err != nil {
 			return err
 		}
+		// Just as we need to track which file the token comes from, we need to
+		// keep track of which snippets do the tokens come from. This is helpful
+		// in tracking import cycles across files/snippets by namespacing them. Without
+		// this we end up with false-positives in cycle-detection.
+		for k, v := range tokens {
+			v.inSnippet = true
+			v.snippetName = name
+			tokens[k] = v
+		}
 		p.definedSnippets[name] = tokens
 		// empty block keys so we don't save this block as a real server.
 		p.block.Keys = nil
@@ -314,10 +331,15 @@ func (p *parser) doImport() error {
 	tokensBefore := p.tokens[:p.cursor-1-len(args)]
 	tokensAfter := p.tokens[p.cursor+1:]
 	var importedTokens []Token
+	var nodes []string
 
 	// first check snippets. That is a simple, non-recursive replacement
 	if p.definedSnippets != nil && p.definedSnippets[importPattern] != nil {
 		importedTokens = p.definedSnippets[importPattern]
+		if len(importedTokens) > 0 {
+			// just grab the first one
+			nodes = append(nodes, fmt.Sprintf("%s:%s", importedTokens[0].File, importedTokens[0].snippetName))
+		}
 	} else {
 		// make path relative to the file of the _token_ being processed rather
 		// than current working directory (issue #867) and then use glob to get
@@ -353,7 +375,6 @@ func (p *parser) doImport() error {
 		}
 
 		// collect all the imported tokens
-
 		for _, importFile := range matches {
 			newTokens, err := p.doSingleImport(importFile)
 			if err != nil {
@@ -361,6 +382,18 @@ func (p *parser) doImport() error {
 			}
 			importedTokens = append(importedTokens, newTokens...)
 		}
+		nodes = matches
+	}
+
+	nodeName := p.File()
+	if p.Token().inSnippet {
+		nodeName += fmt.Sprintf(":%s", p.Token().snippetName)
+	}
+	p.importGraph.addNode(nodeName)
+	p.importGraph.addNodes(nodes)
+	if err := p.importGraph.addEdges(nodeName, nodes); err != nil {
+		p.importGraph.removeNodes(nodes)
+		return err
 	}
 
 	// copy the tokens so we don't overwrite p.definedSnippets
diff --git a/caddyconfig/caddyfile/parse_test.go b/caddyconfig/caddyfile/parse_test.go
index 94a69a418..3c7db79bd 100755
--- a/caddyconfig/caddyfile/parse_test.go
+++ b/caddyconfig/caddyfile/parse_test.go
@@ -444,6 +444,28 @@ func TestParseAll(t *testing.T) {
 
 		{`import notfound/*`, false, [][]string{}},        // glob needn't error with no matches
 		{`import notfound/file.conf`, true, [][]string{}}, // but a specific file should
+
+		// recursive self-import
+		{`import testdata/import_recursive0.txt`, true, [][]string{}},
+		{`import testdata/import_recursive3.txt
+		import testdata/import_recursive1.txt`, true, [][]string{}},
+
+		// cyclic imports
+		{`(A) {
+			import A
+		}
+		:80
+		import A
+		`, true, [][]string{}},
+		{`(A) {
+			import B
+		}
+		(B) {
+			import A
+		}
+		:80
+		import A
+		`, true, [][]string{}},
 	} {
 		p := testParser(test.input)
 		blocks, err := p.parseAll()
diff --git a/caddyconfig/caddyfile/testdata/import_recursive0.txt b/caddyconfig/caddyfile/testdata/import_recursive0.txt
new file mode 100644
index 000000000..4d827b33e
--- /dev/null
+++ b/caddyconfig/caddyfile/testdata/import_recursive0.txt
@@ -0,0 +1 @@
+import import_recursive0.txt
\ No newline at end of file
diff --git a/caddyconfig/caddyfile/testdata/import_recursive1.txt b/caddyconfig/caddyfile/testdata/import_recursive1.txt
new file mode 100644
index 000000000..9b6102ed7
--- /dev/null
+++ b/caddyconfig/caddyfile/testdata/import_recursive1.txt
@@ -0,0 +1 @@
+import import_recursive2.txt
\ No newline at end of file
diff --git a/caddyconfig/caddyfile/testdata/import_recursive2.txt b/caddyconfig/caddyfile/testdata/import_recursive2.txt
new file mode 100644
index 000000000..5553dea38
--- /dev/null
+++ b/caddyconfig/caddyfile/testdata/import_recursive2.txt
@@ -0,0 +1 @@
+import import_recursive3.txt
\ No newline at end of file
diff --git a/caddyconfig/caddyfile/testdata/import_recursive3.txt b/caddyconfig/caddyfile/testdata/import_recursive3.txt
new file mode 100644
index 000000000..fcf0237f6
--- /dev/null
+++ b/caddyconfig/caddyfile/testdata/import_recursive3.txt
@@ -0,0 +1 @@
+import import_recursive1.txt
\ No newline at end of file