From 7b97e045557788efee6803261cf612eaf975c6be Mon Sep 17 00:00:00 2001
From: guillep2k <18600385+guillep2k@users.noreply.github.com>
Date: Tue, 12 Nov 2019 23:27:11 -0300
Subject: [PATCH] Convert EOL to UNIX-style to render MD properly (#8925)

* Convert EOL to UNIX-style to render MD properly

* Update modules/markup/markdown/markdown.go

Co-Authored-By: zeripath <art27@cantab.net>

* Fix lint optimization

* Check for empty content before conversion

* Update modules/util/util.go

Co-Authored-By: zeripath <art27@cantab.net>

* Improved checks and tests

* Add paragraph render test

* Improve speed even more, improve tests

* Small improvement by @gary-kim

* Fix test for DOS

* More improvements

* Restart CI
---
 modules/markup/markdown/markdown.go      |  3 +-
 modules/markup/markdown/markdown_test.go | 22 +++++++++
 modules/util/util.go                     | 37 +++++++++++++++
 modules/util/util_test.go                | 59 ++++++++++++++++++++++++
 4 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go
index ff78d7ea3a..fc704243e2 100644
--- a/modules/markup/markdown/markdown.go
+++ b/modules/markup/markdown/markdown.go
@@ -157,7 +157,8 @@ func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte {
 		exts |= blackfriday.HardLineBreak
 	}
 
-	body = blackfriday.Run(body, blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts))
+	// Need to normalize EOL to UNIX LF to have consistent results in rendering
+	body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts))
 	return markup.SanitizeBytes(body)
 }
 
diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go
index b29f870ce5..e80173c6cf 100644
--- a/modules/markup/markdown/markdown_test.go
+++ b/modules/markup/markdown/markdown_test.go
@@ -294,3 +294,25 @@ func TestTotal_RenderString(t *testing.T) {
 		assert.Equal(t, testCases[i+1], line)
 	}
 }
+
+func TestRender_RenderParagraphs(t *testing.T) {
+	test := func(t *testing.T, str string, cnt int) {
+		unix := []byte(str)
+		res := string(RenderRaw(unix, "", false))
+		assert.Equal(t, strings.Count(res, "<p"), cnt)
+
+		mac := []byte(strings.ReplaceAll(str, "\n", "\r"))
+		res = string(RenderRaw(mac, "", false))
+		assert.Equal(t, strings.Count(res, "<p"), cnt)
+
+		dos := []byte(strings.ReplaceAll(str, "\n", "\r\n"))
+		res = string(RenderRaw(dos, "", false))
+		assert.Equal(t, strings.Count(res, "<p"), cnt)
+	}
+
+	test(t, "\nOne\nTwo\nThree", 1)
+	test(t, "\n\nOne\nTwo\nThree", 1)
+	test(t, "\n\nOne\nTwo\nThree\n\n\n", 1)
+	test(t, "A\n\nB\nC\n", 2)
+	test(t, "A\n\n\nB\nC\n", 2)
+}
diff --git a/modules/util/util.go b/modules/util/util.go
index 4203b5eb51..6d02b5f52f 100644
--- a/modules/util/util.go
+++ b/modules/util/util.go
@@ -5,6 +5,7 @@
 package util
 
 import (
+	"bytes"
 	"strings"
 )
 
@@ -63,3 +64,39 @@ func Min(a, b int) int {
 func IsEmptyString(s string) bool {
 	return len(strings.TrimSpace(s)) == 0
 }
+
+// NormalizeEOL will convert Windows (CRLF) and Mac (CR) EOLs to UNIX (LF)
+func NormalizeEOL(input []byte) []byte {
+	var right, left, pos int
+	if right = bytes.IndexByte(input, '\r'); right == -1 {
+		return input
+	}
+	length := len(input)
+	tmp := make([]byte, length)
+
+	// We know that left < length because otherwise right would be -1 from IndexByte.
+	copy(tmp[pos:pos+right], input[left:left+right])
+	pos += right
+	tmp[pos] = '\n'
+	left += right + 1
+	pos++
+
+	for left < length {
+		if input[left] == '\n' {
+			left++
+		}
+
+		right = bytes.IndexByte(input[left:], '\r')
+		if right == -1 {
+			copy(tmp[pos:], input[left:])
+			pos += length - left
+			break
+		}
+		copy(tmp[pos:pos+right], input[left:left+right])
+		pos += right
+		tmp[pos] = '\n'
+		left += right + 1
+		pos++
+	}
+	return tmp[:pos]
+}
diff --git a/modules/util/util_test.go b/modules/util/util_test.go
index 2475065059..04ab42f292 100644
--- a/modules/util/util_test.go
+++ b/modules/util/util_test.go
@@ -5,6 +5,7 @@
 package util
 
 import (
+	"strings"
 	"testing"
 
 	"code.gitea.io/gitea/modules/setting"
@@ -94,3 +95,61 @@ func TestIsEmptyString(t *testing.T) {
 		assert.Equal(t, v.expected, IsEmptyString(v.s))
 	}
 }
+
+func Test_NormalizeEOL(t *testing.T) {
+	data1 := []string{
+		"",
+		"This text starts with empty lines",
+		"another",
+		"",
+		"",
+		"",
+		"Some other empty lines in the middle",
+		"more.",
+		"And more.",
+		"Ends with empty lines too.",
+		"",
+		"",
+		"",
+	}
+
+	data2 := []string{
+		"This text does not start with empty lines",
+		"another",
+		"",
+		"",
+		"",
+		"Some other empty lines in the middle",
+		"more.",
+		"And more.",
+		"Ends without EOLtoo.",
+	}
+
+	buildEOLData := func(data []string, eol string) []byte {
+		return []byte(strings.Join(data, eol))
+	}
+
+	dos := buildEOLData(data1, "\r\n")
+	unix := buildEOLData(data1, "\n")
+	mac := buildEOLData(data1, "\r")
+
+	assert.Equal(t, unix, NormalizeEOL(dos))
+	assert.Equal(t, unix, NormalizeEOL(mac))
+	assert.Equal(t, unix, NormalizeEOL(unix))
+
+	dos = buildEOLData(data2, "\r\n")
+	unix = buildEOLData(data2, "\n")
+	mac = buildEOLData(data2, "\r")
+
+	assert.Equal(t, unix, NormalizeEOL(dos))
+	assert.Equal(t, unix, NormalizeEOL(mac))
+	assert.Equal(t, unix, NormalizeEOL(unix))
+
+	assert.Equal(t, []byte("one liner"), NormalizeEOL([]byte("one liner")))
+	assert.Equal(t, []byte("\n"), NormalizeEOL([]byte("\n")))
+	assert.Equal(t, []byte("\ntwo liner"), NormalizeEOL([]byte("\ntwo liner")))
+	assert.Equal(t, []byte("two liner\n"), NormalizeEOL([]byte("two liner\n")))
+	assert.Equal(t, []byte{}, NormalizeEOL([]byte{}))
+
+	assert.Equal(t, []byte("mix\nand\nmatch\n."), NormalizeEOL([]byte("mix\r\nand\rmatch\n.")))
+}