From d409d3ab57894de853bbc5fbacf32628b4d8fa1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kim=20=22BKC=22=20Carlb=C3=A4cker?=
 <kim.carlbacker@gmail.com>
Date: Thu, 13 Apr 2017 04:52:24 +0200
Subject: [PATCH] Sanitation fix from Gogs (#1461)

* Santiation fix from Gogs

* Linting

* Fix build-errors

* still not working

* Fix all the things!

* gofmt

* Add code-injection checks
---
 models/repo.go                     |  6 +--
 models/user.go                     |  4 +-
 modules/markdown/markdown.go       | 21 +---------
 modules/markdown/sanitizer.go      | 66 ++++++++++++++++++++++++++++++
 modules/markdown/sanitizer_test.go | 44 ++++++++++++++++++++
 modules/templates/helper.go        |  2 +-
 routers/init.go                    |  2 +-
 7 files changed, 118 insertions(+), 27 deletions(-)
 create mode 100644 modules/markdown/sanitizer.go
 create mode 100644 modules/markdown/sanitizer_test.go

diff --git a/models/repo.go b/models/repo.go
index a35a7597f4..682dbf65a7 100644
--- a/models/repo.go
+++ b/models/repo.go
@@ -595,7 +595,7 @@ func (repo *Repository) DescriptionHTML() template.HTML {
 	sanitize := func(s string) string {
 		return fmt.Sprintf(`<a href="%[1]s" target="_blank" rel="noopener">%[1]s</a>`, s)
 	}
-	return template.HTML(descPattern.ReplaceAllStringFunc(markdown.Sanitizer.Sanitize(repo.Description), sanitize))
+	return template.HTML(descPattern.ReplaceAllStringFunc(markdown.Sanitize(repo.Description), sanitize))
 }
 
 // LocalCopyPath returns the local repository copy path
@@ -861,8 +861,8 @@ func cleanUpMigrateGitConfig(configPath string) error {
 // createDelegateHooks creates all the hooks scripts for the repo
 func createDelegateHooks(repoPath string) (err error) {
 	var (
-		hookNames = []string{"pre-receive", "update", "post-receive"}
-		hookTpl = fmt.Sprintf("#!/usr/bin/env %s\ndata=$(cat)\nexitcodes=\"\"\nhookname=$(basename $0)\nGIT_DIR=${GIT_DIR:-$(dirname $0)}\n\nfor hook in ${GIT_DIR}/hooks/${hookname}.d/*; do\ntest -x \"${hook}\" || continue\necho \"${data}\" | \"${hook}\"\nexitcodes=\"${exitcodes} $?\"\ndone\n\nfor i in ${exitcodes}; do\n[ ${i} -eq 0 ] || exit ${i}\ndone\n", setting.ScriptType)
+		hookNames     = []string{"pre-receive", "update", "post-receive"}
+		hookTpl       = fmt.Sprintf("#!/usr/bin/env %s\ndata=$(cat)\nexitcodes=\"\"\nhookname=$(basename $0)\nGIT_DIR=${GIT_DIR:-$(dirname $0)}\n\nfor hook in ${GIT_DIR}/hooks/${hookname}.d/*; do\ntest -x \"${hook}\" || continue\necho \"${data}\" | \"${hook}\"\nexitcodes=\"${exitcodes} $?\"\ndone\n\nfor i in ${exitcodes}; do\n[ ${i} -eq 0 ] || exit ${i}\ndone\n", setting.ScriptType)
 		giteaHookTpls = []string{
 			fmt.Sprintf("#!/usr/bin/env %s\n\"%s\" hook --config='%s' pre-receive\n", setting.ScriptType, setting.AppPath, setting.CustomConf),
 			fmt.Sprintf("#!/usr/bin/env %s\n\"%s\" hook --config='%s' update $1 $2 $3\n", setting.ScriptType, setting.AppPath, setting.CustomConf),
diff --git a/models/user.go b/models/user.go
index 72c21f4369..59ce631172 100644
--- a/models/user.go
+++ b/models/user.go
@@ -163,7 +163,7 @@ func (u *User) UpdateDiffViewStyle(style string) error {
 func (u *User) AfterSet(colName string, _ xorm.Cell) {
 	switch colName {
 	case "full_name":
-		u.FullName = markdown.Sanitizer.Sanitize(u.FullName)
+		u.FullName = markdown.Sanitize(u.FullName)
 	case "created_unix":
 		u.Created = time.Unix(u.CreatedUnix, 0).Local()
 	case "updated_unix":
@@ -867,7 +867,7 @@ func updateUser(e Engine, u *User) error {
 	u.Website = base.TruncateString(u.Website, 255)
 	u.Description = base.TruncateString(u.Description, 255)
 
-	u.FullName = markdown.Sanitizer.Sanitize(u.FullName)
+	u.FullName = markdown.Sanitize(u.FullName)
 	_, err := e.Id(u.ID).AllCols().Update(u)
 	return err
 }
diff --git a/modules/markdown/markdown.go b/modules/markdown/markdown.go
index 52459e360e..813fabe178 100644
--- a/modules/markdown/markdown.go
+++ b/modules/markdown/markdown.go
@@ -15,7 +15,6 @@ import (
 	"strings"
 
 	"github.com/Unknwon/com"
-	"github.com/microcosm-cc/bluemonday"
 	"github.com/russross/blackfriday"
 	"golang.org/x/net/html"
 
@@ -29,24 +28,6 @@ const (
 	IssueNameStyleAlphanumeric = "alphanumeric"
 )
 
-// Sanitizer markdown sanitizer
-var Sanitizer = bluemonday.UGCPolicy()
-
-// BuildSanitizer initializes sanitizer with allowed attributes based on settings.
-// This function should only be called once during entire application lifecycle.
-func BuildSanitizer() {
-	// Normal markdown-stuff
-	Sanitizer.AllowAttrs("class").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).OnElements("code", "div", "ul", "ol", "dl")
-
-	// Checkboxes
-	Sanitizer.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
-	Sanitizer.AllowAttrs("checked", "disabled").OnElements("input")
-	Sanitizer.AllowNoAttrs().OnElements("label")
-
-	// Custom URL-Schemes
-	Sanitizer.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
-}
-
 // IsMarkdownFile reports whether name looks like a Markdown file
 // based on its extension.
 func IsMarkdownFile(name string) bool {
@@ -708,7 +689,7 @@ func render(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMa
 	urlPrefix = strings.Replace(urlPrefix, " ", "+", -1)
 	result := RenderRaw(rawBytes, urlPrefix, isWikiMarkdown)
 	result = PostProcess(result, urlPrefix, metas, isWikiMarkdown)
-	result = Sanitizer.SanitizeBytes(result)
+	result = SanitizeBytes(result)
 	return result
 }
 
diff --git a/modules/markdown/sanitizer.go b/modules/markdown/sanitizer.go
new file mode 100644
index 0000000000..14e8fc1b22
--- /dev/null
+++ b/modules/markdown/sanitizer.go
@@ -0,0 +1,66 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Copyright 2017 The Gogs Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package markdown
+
+import (
+	"regexp"
+	"sync"
+
+	"code.gitea.io/gitea/modules/log"
+	"code.gitea.io/gitea/modules/setting"
+
+	"github.com/microcosm-cc/bluemonday"
+)
+
+// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
+// any modification to the underlying policies once it's been created.
+type Sanitizer struct {
+	policy *bluemonday.Policy
+	init   sync.Once
+}
+
+var sanitizer = &Sanitizer{}
+
+// NewSanitizer initializes sanitizer with allowed attributes based on settings.
+// Multiple calls to this function will only create one instance of Sanitizer during
+// entire application lifecycle.
+func NewSanitizer() {
+	log.Trace("Markdown: sanitizer initialization requested")
+	sanitizer.init.Do(func() {
+		sanitizer.policy = bluemonday.UGCPolicy()
+		// We only want to allow HighlightJS specific classes for code blocks
+		sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code")
+
+		// Checkboxes
+		sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
+		sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input")
+
+		// Custom URL-Schemes
+		sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
+
+		log.Trace("Markdown: sanitizer initialized")
+	})
+}
+
+// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
+func Sanitize(s string) string {
+	if sanitizer.policy == nil {
+		NewSanitizer()
+	}
+	return sanitizer.policy.Sanitize(s)
+}
+
+// SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist.
+func SanitizeBytes(b []byte) []byte {
+	if len(b) == 0 {
+		// nothing to sanitize
+		return b
+	}
+	if sanitizer.policy == nil {
+		NewSanitizer()
+	}
+	return sanitizer.policy.SanitizeBytes(b)
+}
diff --git a/modules/markdown/sanitizer_test.go b/modules/markdown/sanitizer_test.go
new file mode 100644
index 0000000000..77a4b33c84
--- /dev/null
+++ b/modules/markdown/sanitizer_test.go
@@ -0,0 +1,44 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Copyright 2017 The Gogs Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package markdown
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func Test_Sanitizer(t *testing.T) {
+	NewSanitizer()
+	testCases := []string{
+		// Regular
+		`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, `<a href="http://www.google.com" rel="nofollow">Google</a>`,
+
+		// Code highlighting class
+		`<code class="random string"></code>`, `<code></code>`,
+		`<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`,
+		`<code class="language-go"></code>`, `<code class="language-go"></code>`,
+
+		// Input checkbox
+		`<input type="hidden">`, ``,
+		`<input type="checkbox">`, `<input type="checkbox">`,
+		`<input checked disabled autofocus>`, `<input checked="" disabled="">`,
+
+		// Code highlight injection
+		`<code class="language-random&#32;ui&#32;tab&#32;active&#32;menu&#32;attached&#32;animating&#32;sidebar&#32;following&#32;bar&#32;center"></code>`, `<code></code>`,
+		`<code class="language-lol&#32;ui&#32;tab&#32;active&#32;menu&#32;attached&#32;animating&#32;sidebar&#32;following&#32;bar&#32;center">
+<code class="language-lol&#32;ui&#32;container&#32;input&#32;huge&#32;basic&#32;segment&#32;center">&nbsp;</code>
+<img src="https://try.gogs.io/img/favicon.png" width="200" height="200">
+<code class="language-lol&#32;ui&#32;container&#32;input&#32;massive&#32;basic&#32;segment">Hello there! Something has gone wrong, we are working on it.</code>
+<code class="language-lol&#32;ui&#32;container&#32;input&#32;huge&#32;basic&#32;segment">In the meantime, play a game with us at&nbsp;<a href="http://example.com/">example.com</a>.</code>
+</code>`, "<code>\n<code>\u00a0</code>\n<img src=\"https://try.gogs.io/img/favicon.png\" width=\"200\" height=\"200\">\n<code>Hello there! Something has gone wrong, we are working on it.</code>\n<code>In the meantime, play a game with us at\u00a0<a href=\"http://example.com/\" rel=\"nofollow\">example.com</a>.</code>\n</code>",
+	}
+
+	for i := 0; i < len(testCases); i += 2 {
+		assert.Equal(t, testCases[i+1], Sanitize(testCases[i]))
+		assert.Equal(t, testCases[i+1], string(SanitizeBytes([]byte(testCases[i]))))
+	}
+}
diff --git a/modules/templates/helper.go b/modules/templates/helper.go
index 51877f8039..235b649547 100644
--- a/modules/templates/helper.go
+++ b/modules/templates/helper.go
@@ -164,7 +164,7 @@ func Safe(raw string) template.HTML {
 
 // Str2html render Markdown text to HTML
 func Str2html(raw string) template.HTML {
-	return template.HTML(markdown.Sanitizer.Sanitize(raw))
+	return template.HTML(markdown.Sanitize(raw))
 }
 
 // List traversings the list
diff --git a/routers/init.go b/routers/init.go
index 3b1618a383..dec7f1818f 100644
--- a/routers/init.go
+++ b/routers/init.go
@@ -49,7 +49,7 @@ func GlobalInit() {
 
 	if setting.InstallLock {
 		highlight.NewContext()
-		markdown.BuildSanitizer()
+		markdown.NewSanitizer()
 		if err := models.NewEngine(); err != nil {
 			log.Fatal(4, "Failed to initialize ORM engine: %v", err)
 		}