Skip to content

Commit

Permalink
tpl/transform: Add transform.XMLEscape template function
Browse files Browse the repository at this point in the history
Fixes #3268
  • Loading branch information
jmooring authored and bep committed Nov 27, 2023
1 parent ef12d16 commit b4c5df4
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 2 deletions.
38 changes: 38 additions & 0 deletions docs/content/en/functions/transform/XMLEscape.md
@@ -0,0 +1,38 @@
---
title: transform.XMLEscape
description: Returns the given string, removing disallowed characters then escaping the result to its XML equivalent.
categories: []
keywords: []
action:
aliases: []
related: []
returnType: string
signatures: [transform.XMLEscape INPUT]
---

The `transform.XMLEscape` function removes [disallowed characters] as defined in the XML specification, then escapes the result by replacing the following characters with [HTML entities]:

- `"``"`
- `'``'`
- `&``&`
- `<``&lt;`
- `>``&gt;`
- `\t``&#x9;`
- `\n``&#xA;`
- `\r``&#xD;`

For example:

```go-html-template
transform.XMLEscape "<p>abc</p>" → &lt;p&gt;abc&lt;/p&gt;
```

When using `transform.XMLEscape` in a template rendered by Go's [html/template] package, declare the string to be safe HTML to avoid double escaping. For example, in an RSS template:

{{< code file="layouts/_default/rss.xml" >}}
<description>{{ .Summary | transform.XMLEscape | safeHTML }}</description>
{{< /code >}}

[disallowed characters]: https://www.w3.org/TR/xml/#charsets
[html entities]: https://developer.mozilla.org/en-us/docs/glossary/entity
[html/template]: https://pkg.go.dev/html/template
2 changes: 1 addition & 1 deletion tpl/tplimpl/embedded/templates/_default/rss.xml
Expand Up @@ -64,7 +64,7 @@
<pubDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</pubDate>
{{- with $authorEmail }}<author>{{ . }}{{ with $authorName }} ({{ . }}){{ end }}</author>{{ end }}
<guid>{{ .Permalink }}</guid>
<description>{{ .Summary | html }}</description>
<description>{{ .Summary | transform.XMLEscape | safeHTML }}</description>
</item>
{{- end }}
</channel>
Expand Down
6 changes: 5 additions & 1 deletion tpl/tplimpl/template_funcs_test.go
Expand Up @@ -37,7 +37,7 @@ home=["HTML"]
-- files/README.txt --
Hugo Rocks!
-- content/blog/hugo-rocks.md --
---
---
title: "**BatMan**"
---
`
Expand Down Expand Up @@ -65,6 +65,10 @@ title: "**BatMan**"
// This will fail the build, so skip for now.
continue
}
if strings.Contains(example[0], "transform.XMLEscape") {
// This will fail the build, so skip for now.
continue
}
templates = append(templates, example[0])
expected = append(expected, example[1])
}
Expand Down
10 changes: 10 additions & 0 deletions tpl/transform/init.go
Expand Up @@ -112,6 +112,16 @@ func init() {
},
)

ns.AddMethodMapping(ctx.XMLEscape,
nil,
[][2]string{
{
`{{ transform.XMLEscape "<p>abc</p>" }}`,
`&lt;p&gt;abc&lt;/p&gt;`,
},
},
)

return ns
}

Expand Down
20 changes: 20 additions & 0 deletions tpl/transform/integration_test.go
Expand Up @@ -65,3 +65,23 @@ foo
b.AssertFileContent("public/p3/index.html", "_<h2 id=\"foo\">foo</h2>\n<p>bar</p>\n_")
b.AssertFileContent("public/p4/index.html", "_<p id=\"bar\">foo</p>\n_")
}

func TestXMLEscape(t *testing.T) {
t.Parallel()

files := `
-- config.toml --
disableKinds = ['section','sitemap','taxonomy','term']
-- content/p1.md --
---
title: p1
---
a **b** c
<!--more-->
`
b := hugolib.Test(t, files)

b.AssertFileContent("public/index.xml", `
<description>&lt;p&gt;a &lt;strong&gt;b&lt;/strong&gt; c&lt;/p&gt;</description>
`)
}
31 changes: 31 additions & 0 deletions tpl/transform/transform.go
Expand Up @@ -15,9 +15,12 @@
package transform

import (
"bytes"
"context"
"encoding/xml"
"html"
"html/template"
"strings"

"github.com/gohugoio/hugo/cache/namedmemcache"
"github.com/gohugoio/hugo/markup/converter/hooks"
Expand Down Expand Up @@ -118,6 +121,34 @@ func (ns *Namespace) HTMLUnescape(s any) (string, error) {
return html.UnescapeString(ss), nil
}

// XMLEscape returns the given string, removing disallowed characters then
// escaping the result to its XML equivalent.
func (ns *Namespace) XMLEscape(s any) (string, error) {
ss, err := cast.ToStringE(s)
if err != nil {
return "", err
}

// https://www.w3.org/TR/xml/#NT-Char
cleaned := strings.Map(func(r rune) rune {
if r == 0x9 || r == 0xA || r == 0xD ||
(r >= 0x20 && r <= 0xD7FF) ||
(r >= 0xE000 && r <= 0xFFFD) ||
(r >= 0x10000 && r <= 0x10FFFF) {
return r
}
return -1
}, ss)

var buf bytes.Buffer
err = xml.EscapeText(&buf, []byte(cleaned))
if err != nil {
return "", err
}

return buf.String(), nil
}

// Markdownify renders s from Markdown to HTML.
func (ns *Namespace) Markdownify(ctx context.Context, s any) (template.HTML, error) {

Expand Down

0 comments on commit b4c5df4

Please sign in to comment.