Skip to content

Commit

Permalink
Fix escaping of HTML attributes
Browse files Browse the repository at this point in the history
The escaping of attributes should have been handled according to
https://html.spec.whatwg.org/multipage/parsing.html#escapingString
and as it was not, it allowed the possibility of an XSS by overloading
a known attribute like the href. This can be seen in the test within
this commit and is recognised as a vulnerability that existed prior
to this commit.

Additional update to the versions of modules we depend upon
  • Loading branch information
buro9 committed Jul 7, 2021
1 parent c0a6f20 commit f0057e2
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 64 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ module github.com/microcosm-cc/bluemonday
go 1.16

require (
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d

This comment has been minimized.

Copy link
@faulkner

faulkner Jul 8, 2021

Did you intend to use this somewhere? Running go mod tidy would remove this and clean up some old entries in go.sum.

This comment has been minimized.

Copy link
@buro9

buro9 Jul 8, 2021

Author Member

Good catch... I'm still getting used to go mod (no longer an IC). Have now run it and pushed another commit.

Question: Is this enough to trigger another release? In my mind no, but feel free to tell me you think it does.

This comment has been minimized.

Copy link
@faulkner

faulkner Jul 8, 2021

Question: Is this enough to trigger another release? In my mind no, but feel free to tell me you think it does.

I don't think another release is needed. If someone really doesn't want that extra dep they could go get github.com/microcosm-cc/bluemonday@1a86fcd43b1df85051df5533c0a325b86596a2d2 until a new version is released. Thanks for the quick update!

github.com/aymerick/douceur v0.2.0
github.com/gorilla/css v1.0.0 // indirect
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
Expand All @@ -6,6 +8,8 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758 h1:aEpZnXcAmXkd6AvLb2OPt+EN1
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b h1:k+E048sYJHyVnsr1GDrRZWQ32D2C7lWs9JRc0bel53A=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
88 changes: 30 additions & 58 deletions sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,18 @@ func escapeUrlComponent(w stringWriterWriter, val string) error {
return err
}

// Query represents a query
// Query represents a single part of the query string, a query param
type Query struct {
Key string
Value string
HasValue bool
}

func parseQuery(query string) (values []Query, err error) {
// This is essentially a copy of parseQuery from
// https://golang.org/src/net/url/url.go but adjusted to build our values
// based on our type, which we need to preserve the ordering of the query
// string
for query != "" {
key := query
if i := strings.IndexAny(key, "&;"); i >= 0 {
Expand Down Expand Up @@ -213,43 +217,6 @@ func sanitizedURL(val string) (string, error) {
return u.String(), nil
}

func (p *Policy) writeLinkableBuf(buff stringWriterWriter, token *html.Token) (int, error) {
// do not escape multiple query parameters
tokenBuff := bytes.NewBuffer(make([]byte, 0, 1024)) // This should stay on the stack unless it gets too big

tokenBuff.WriteByte('<')
tokenBuff.WriteString(token.Data)
for _, attr := range token.Attr {
tokenBuff.WriteByte(' ')
tokenBuff.WriteString(attr.Key)
tokenBuff.Write([]byte{'=', '"'})
switch attr.Key {
case "href", "src":
u, ok := p.validURL(attr.Val)
if !ok {
tokenBuff.WriteString(html.EscapeString(attr.Val))
continue
}
u, err := sanitizedURL(u)
if err == nil {
tokenBuff.WriteString(u)
} else {
// fallthrough
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
default:
// re-apply
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
tokenBuff.WriteByte('"')
}
if token.Type == html.SelfClosingTagToken {
tokenBuff.WriteString("/")
}
tokenBuff.WriteString(">")
return buff.Write(tokenBuff.Bytes())
}

// Performs the actual sanitization process.
func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
var buff bytes.Buffer
Expand Down Expand Up @@ -344,7 +311,9 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
aps = aa
}
if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
token.Attr = escapeAttributes(
p.sanitizeAttrs(token.Data, token.Attr, aps),
)

This comment has been minimized.

Copy link
@zeripath

zeripath Jun 3, 2022

Contributor

I don't fully understand why we need to escape on top of sanitize?

This comment has been minimized.

Copy link
@buro9

buro9 Jul 1, 2022

Author Member

I think you're right here... just looking through and it looks like this introduced double-escaping in non-URL attributes.

Further, it didn't add anything to the URL attributes which were already being escaped sufficiently.

This comment has been minimized.

Copy link
@zeripath

zeripath Jul 1, 2022

Contributor

I mean I guess it prevents an https://vulnerable/?query=<script>alert();</script> being sent to a vulnerable url that might insert that <script>alert();</script> directly into html - but that doesn't seem like it's really the job of bluemonday and or its downstream users to prevent.

Similarly it might help prevent stupid js that reads the attributes of a sanitized page from accidentally inserting html but again - that doesn't seem like the job of bluemonday.

}

if len(token.Attr) == 0 {
Expand All @@ -361,15 +330,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}

if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
if _, err := p.writeLinkableBuf(buff, &token); err != nil {
return err
}
} else {
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}

Expand Down Expand Up @@ -439,7 +401,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}

if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps))
}

if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
Expand All @@ -451,15 +413,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}
}
if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
if _, err := p.writeLinkableBuf(buff, &token); err != nil {
return err
}
} else {
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}

Expand Down Expand Up @@ -569,9 +524,11 @@ attrsLoop:
for _, ap := range apl {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
Expand Down Expand Up @@ -1087,3 +1044,18 @@ func normaliseElementName(str string) string {
`"`,
)
}

func escapeAttributes(attrs []html.Attribute) []html.Attribute {
escapedAttrs := []html.Attribute{}
for _, attr := range attrs {
attr.Val = escapeAttribute(attr.Val)
escapedAttrs = append(escapedAttrs, attr)
}
return escapedAttrs
}

func escapeAttribute(val string) string {
val = strings.Replace(val, string([]rune{'\u00A0'}), `&nbsp;`, -1)
val = strings.Replace(val, `"`, `&quot;`, -1)
return val
}
46 changes: 41 additions & 5 deletions sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,19 @@ func TestLinks(t *testing.T) {
},
{
in: `<a href="?q=1&r=2">`,
expected: `<a href="?q=1&r=2" rel="nofollow">`,
expected: `<a href="?q=1&amp;r=2" rel="nofollow">`,
},
{
in: `<a href="?q=1&q=2">`,
expected: `<a href="?q=1&q=2" rel="nofollow">`,
expected: `<a href="?q=1&amp;q=2" rel="nofollow">`,
},
{
in: `<a href="?q=%7B%22value%22%3A%22a%22%7D">`,
expected: `<a href="?q=%7B%22value%22%3A%22a%22%7D" rel="nofollow">`,
},
{
in: `<a href="?q=1&r=2&s=:foo@">`,
expected: `<a href="?q=1&r=2&s=%3Afoo%40" rel="nofollow">`,
expected: `<a href="?q=1&amp;r=2&amp;s=:foo@" rel="nofollow">`,
},
{
in: `<img src="" alt="Red dot" />`,
Expand All @@ -152,8 +152,8 @@ func TestLinks(t *testing.T) {
expected: `<img src="giraffe.gif"/>`,
},
{
in: `<img src="giraffe.gif?height=500&width=500&flag" />`,
expected: `<img src="giraffe.gif?height=500&width=500&flag"/>`,
in: `<img src="giraffe.gif?height=500&amp;width=500&amp;flag" />`,
expected: `<img src="giraffe.gif?height=500&amp;width=500&amp;flag"/>`,
},
}

Expand Down Expand Up @@ -3624,3 +3624,39 @@ func TestAdditivePolicies(t *testing.T) {
})
})
}

func TestHrefSanitization(t *testing.T) {
tests := []test{
{
in: `abc<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">CLICK`,
expected: `abc<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">CLICK`,
},
{
in: `<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">`,
expected: `<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">`,
},
}

p := UGCPolicy()

// These tests are run concurrently to enable the race detector to pick up
// potential issues
wg := sync.WaitGroup{}
wg.Add(len(tests))
for ii, tt := range tests {
go func(ii int, tt test) {
out := p.Sanitize(tt.in)
if out != tt.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
tt.in,
out,
tt.expected,
)
}
wg.Done()
}(ii, tt)
}
wg.Wait()
}

0 comments on commit f0057e2

Please sign in to comment.