Skip to content

Commit

Permalink
html: properly handle exclamation marks in comments
Browse files Browse the repository at this point in the history
Properly handle the case where HTML comments begin with exclamation
marks and have no other content, i.e. "<!--!-->". Previously these
comments would cause the tokenizer to consider everything following to
also be considered part of the comment.

Fixes golang/go#37771

Change-Id: I78ea310debc3846f145d62cba017055abc7fa4e0
Reviewed-on: https://go-review.googlesource.com/c/net/+/442496
Run-TryBot: Roland Shoemaker <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Damien Neil <[email protected]>
  • Loading branch information
rolandshoemaker committed Oct 20, 2022
1 parent da05058 commit 430a433
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 19 deletions.
8 changes: 6 additions & 2 deletions html/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,10 @@ func (z *Tokenizer) readComment() {
z.data.end = z.data.start
}
}()
for dashCount := 2; ; {

var dashCount int
beginning := true
for {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
Expand All @@ -620,7 +623,7 @@ func (z *Tokenizer) readComment() {
dashCount++
continue
case '>':
if dashCount >= 2 {
if dashCount >= 2 || beginning {
z.data.end = z.raw.end - len("-->")
return
}
Expand All @@ -638,6 +641,7 @@ func (z *Tokenizer) readComment() {
}
}
dashCount = 0
beginning = false
}
}

Expand Down
45 changes: 28 additions & 17 deletions html/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,16 @@ var tokenTests = []tokenTest{
"a<!--x--!>z",
"a$<!--x-->$z",
},
{
"comment14",
"a<!--!-->z",
"a$<!--!-->$z",
},
{
"comment15",
"a<!-- !-->z",
"a$<!-- !-->$z",
},
// An attribute with a backslash.
{
"backslash",
Expand Down Expand Up @@ -456,26 +466,27 @@ var tokenTests = []tokenTest{
}

func TestTokenizer(t *testing.T) {
loop:
for _, tt := range tokenTests {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
continue loop
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
continue loop
t.Run(tt.desc, func(t *testing.T) {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
return
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
return
}
}
}
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
})
}
}

Expand Down

0 comments on commit 430a433

Please sign in to comment.