gohugoio · Traumflug · Apr 18, 2021 · Apr 21, 2021 · Apr 21, 2021 · Apr 18, 2021
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
@@ -135,8 +135,11 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
 				w.buff.WriteByte(b)
 
 				if !w.inQuote && b == '>' {
-					w.endCollecting()
-					break
+					s := w.buff.String()
+					if w.inPreTag == "" || w.parseEndTag(s) {
+						w.endCollecting()
+						break
+					}
 				}
 			}
 		}
@@ -151,31 +154,18 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
 				continue
 			}
 
-			if w.inPreTag != "" { // within preformatted code block
-				s := w.buff.String()
+			// Ignore everything inside a 'preformatted' tag. The tag its self is
+			// recorded already.
+			if w.inPreTag != "" {
+				w.inPreTag = ""
 				w.buff.Reset()
-				if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName {
-					w.inPreTag = ""
-				}
 				continue
 			}
 
-			// First check if we have processed this element before.
-			w.collector.mu.RLock()
-
 			// Work with the bytes slice as long as it's practical,
 			// to save memory allocations.
 			b := w.buff.Bytes()
 
-			// See https://github.com/dominikh/go-tools/issues/723
-			//lint:ignore S1030 This construct avoids memory allocation for the string.
-			seen := w.collector.elementSet[string(b)]
-			w.collector.mu.RUnlock()
-			if seen {
-				w.buff.Reset()
-				continue
-			}
-
 			// Filter out unwanted tags
 			// if within preformatted code blocks <pre>, <textarea>, <script>, <style>
 			// comments and doctype tags
@@ -242,28 +232,26 @@ func parseStartTag(s string) (string, bool) {
 	s = strings.TrimPrefix(s, "<")
 	s = strings.TrimSuffix(s, ">")
 
-	spaceIndex := strings.Index(s, " ")
-	if spaceIndex != -1 {
-		s = s[:spaceIndex]
-	}
+	fields := strings.Fields(s);
+	tag := fields[0];
 
-	return strings.ToLower(strings.TrimSpace(s)), true
+	return strings.ToLower(tag), true
 }
 
-func parseEndTag(s string) (string, bool) {
-	if !strings.HasPrefix(s, "</") {
-		return "", false
+func (c *htmlElementsCollectorWriter) parseEndTag(s string) bool {
+	slen := len(s)
+	if slen > 83 {
+		// Avoid processing big amounts with ToLower().
+		s = s[slen - 83:]
 	}
-
-	s = strings.TrimPrefix(s, "</")
-	s = strings.TrimSuffix(s, ">")
-
-	return strings.ToLower(strings.TrimSpace(s)), true
+	s = strings.ToLower(s)
+	return strings.HasSuffix(s, "</"+c.inPreTag+">")
 }
 
-// No need to look inside these for HTML elements.
+// No need to look inside these for HTML elements. These can contain arbitrary
+// code which could be misunderstood as HTML.
 func isPreFormatted(s string) bool {
-	return s == "pre" || s == "textarea" || s == "script" || s == "style"
+	return s == "pre" || s == "textarea" || s == "script" || s == "style" || s == "title" || s == "canvas"
 }
 
 type htmlElement struct {
@@ -299,7 +287,10 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
 	// and pretend it's a <div>.
 	if exceptionList[tagName] {
 		tagBuffer = tagName
-		elStr = strings.Replace(elStr, tagName, "div", 1)
+
+		fields := strings.Fields(elStr);
+		fields[0] = "<div"
+		elStr = strings.Join(fields, " ")
 	}
 
 	n, err := html.Parse(strings.NewReader(elStr))
@@ -308,7 +299,7 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
 	}
 	var walk func(*html.Node)
 	walk = func(n *html.Node) {
-		if n.Type == html.ElementNode && strings.Contains(elStr, n.Data) {
+		if n.Type == html.ElementNode && strings.Contains(strings.ToLower(elStr), n.Data) {
 			el.Tag = n.Data
 
 			for _, a := range n.Attr {

diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go
@@ -65,6 +65,10 @@ func TestClassCollector(t *testing.T) {
     <thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
     <tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
 </table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
+		{"thead uppercase", `<TABLE class="CL1">
+    <THEAD class="CL2"><TR class="CL3"><TD class="CL4"></TD></TR></THEAD>
+    <TBODY class="CL5"><TR class="CL6"><TD class="CL7"></TD></TR></TBODY>
+</TABLE>`, f("table tbody td thead tr", "CL1 CL2 CL3 CL4 CL5 CL6 CL7", "")},
 		// https://github.com/gohugoio/hugo/issues/7161
 		{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},
 		{"AlpineJS bind 1", `<body>
@@ -78,7 +82,7 @@ func TestClassCollector(t *testing.T) {
 			f("div", "bg-black bg-gray-300 inline-block mb-2 mr-1 px-2 py-2 rounded", ""),
 		},
 		{"AlpineJS bind 3", `<div x-bind:class="{ 'text-gray-800':  !checked, 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
-		{"AlpineJS bind 4", `<div x-bind:class="{ 'text-gray-800':  !checked, 
+		{"AlpineJS bind 4", `<div x-bind:class="{ 'text-gray-800':  !checked,
 					 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
 		{"AlpineJS bind 5", `<a x-bind:class="{
                 'text-a': a && b,
@@ -89,14 +93,19 @@ func TestClassCollector(t *testing.T) {
                 }" class="block w-36 cursor-pointer pr-3 no-underline capitalize"></a>`, f("a", "block capitalize cursor-pointer no-underline pl-2 pl-3 pr-3 text-a text-b text-gray-600 w-36", "")},
 		{"AlpineJS transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
 		{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
+		{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
+		{"Predefined tags with distinct casing", `<script>if (a < b) { nothing(); }</SCRIPT><div></div>`, f("div script", "", "")},
 		// Issue #7746
 		{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
 		// Issue #7567
 		{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
 		{"Style tags content should be skipped", `<style>p{color: red;font-size: 20px;}</style><div class="foo"></div>`, f("div style", "foo", "")},
 		{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
+		{"A < in a preformatted tag content shouldn't mess up", `<script>if (a < b) { doNothing(); }</script><div class="foo"></div>`, f("div script", "foo", "")},
+		{"Preformatted content with <, twice", `<script>r = a < b;</script><div class="foo"></div><script>r = a < b;</script><div class="foo"></div>`, f("div script", "foo", "")},
+		{"JS text named like a tag content shouldn't mess up", `<script>r = a < b; //script>2</script><div class="foo"></div>`, f("div script", "foo", "")},
 		{"Textarea tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
-		{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
+		{"DOCTYPE should be skipped", `<!DOCTYPE html>`, f("", "", "")},
 		{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
 		// Issue #8417
 		{"Tabs inline", `<hr	id="a" class="foo"><div class="bar">d</div>`, f("div hr", "bar foo", "a")},