Documentation
¶
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var DefaultAllowList = &AllowList{ Tags: []*Tag{ {"address", []string{}, []string{}}, {"article", []string{}, []string{}}, {"aside", []string{}, []string{}}, {"footer", []string{}, []string{}}, {"header", []string{}, []string{}}, {"h1", []string{}, []string{}}, {"h2", []string{}, []string{}}, {"h3", []string{}, []string{}}, {"h4", []string{}, []string{}}, {"h5", []string{}, []string{}}, {"h6", []string{}, []string{}}, {"hgroup", []string{}, []string{}}, {"main", []string{}, []string{}}, {"nav", []string{}, []string{}}, {"section", []string{}, []string{}}, {"blockquote", []string{}, []string{"cite"}}, {"dd", []string{}, []string{}}, {"div", []string{}, []string{}}, {"dl", []string{}, []string{}}, {"dt", []string{}, []string{}}, {"figcaption", []string{}, []string{}}, {"figure", []string{}, []string{}}, {"hr", []string{}, []string{}}, {"li", []string{}, []string{}}, {"main", []string{}, []string{}}, {"ol", []string{}, []string{}}, {"p", []string{}, []string{}}, {"pre", []string{}, []string{}}, {"ul", []string{}, []string{}}, {"a", []string{"rel", "target", "referrerpolicy"}, []string{"href"}}, {"abbr", []string{"title"}, []string{}}, {"b", []string{}, []string{}}, {"bdi", []string{}, []string{}}, {"bdo", []string{}, []string{}}, {"br", []string{}, []string{}}, {"cite", []string{}, []string{}}, {"code", []string{}, []string{}}, {"data", []string{"value"}, []string{}}, {"em", []string{}, []string{}}, {"i", []string{}, []string{}}, {"kbd", []string{}, []string{}}, {"mark", []string{}, []string{}}, {"q", []string{}, []string{"cite"}}, {"s", []string{}, []string{}}, {"small", []string{}, []string{}}, {"span", []string{}, []string{}}, {"strong", []string{}, []string{}}, {"sub", []string{}, []string{}}, {"sup", []string{}, []string{}}, {"time", []string{"datetime"}, []string{}}, {"u", []string{}, []string{}}, {"area", []string{"alt", "coords", "shape", "target", "rel", "referrerpolicy"}, []string{"href"}}, {"audio", []string{"autoplay", "controls", "crossorigin", "duration", "loop", "muted", "preload"}, []string{"src"}}, {"img", []string{"alt", "crossorigin", "height", "width", "loading", "referrerpolicy"}, []string{"src"}}, {"map", []string{"name"}, []string{}}, {"track", []string{"default", "kind", "label", "srclang"}, []string{"src"}}, {"video", []string{"autoplay", "buffered", "controls", "crossorigin", "duration", "loop", "muted", "preload", "height", "width"}, []string{"src", "poster"}}, {"picture", []string{}, []string{}}, {"source", []string{"type"}, []string{"src"}}, {"del", []string{}, []string{}}, {"ins", []string{}, []string{}}, {"caption", []string{}, []string{}}, {"col", []string{"span"}, []string{}}, {"colgroup", []string{}, []string{}}, {"table", []string{}, []string{}}, {"tbody", []string{}, []string{}}, {"td", []string{"colspan", "rowspan"}, []string{}}, {"tfoot", []string{}, []string{}}, {"th", []string{"colspan", "rowspan", "scope"}, []string{}}, {"thead", []string{}, []string{}}, {"tr", []string{}, []string{}}, {"details", []string{"open"}, []string{}}, {"summary", []string{}, []string{}}, }, GlobalAttr: []string{ "class", "id", }, NonHTMLTags: []*Tag{ {Name: "script"}, {Name: "style"}, {Name: "object"}, }, }
DefaultAllowList for HTML filter.
The allowlist contains most tags listed in https://developer.mozilla.org/en-US/docs/Web/HTML/Element . It is not recommended to modify the default list directly, use .Clone() and then modify the new one instead.
Functions ¶
func DefaultURLSanitizer ¶
DefaultURLSanitizer is a default and strict sanitizer. It only accepts
- URL with scheme http or https
- relative URL, such as abc, abc?xxx=1, abc#123
- absolute URL, such as /abc, /abc?xxx=1, /abc#123
func NewWriter ¶
NewWriter returns a new Writer, with DefaultAllowList, writing sanitized HTML content to w.
Example ¶
package main
import (
"bytes"
"fmt"
"io"
"strings"
"github.com/sym01/htmlsanitizer"
)
func main() {
// demo data
data := strings.Repeat(`abc-->
<a href="javascript:alert(1)">link1</a>
<a href=http://example.com>link2<script>xxx</script></a>
<!--`, 1024)
expected := "abc-->" + strings.Repeat(`
<a>link1</a>
<a href="http://example.com">link2</a>
`, 1024)
// underlying writer for demo
o := new(bytes.Buffer)
// source reader for demo
r := bytes.NewBufferString(data)
sanitizedWriter := htmlsanitizer.NewWriter(o)
_, _ = io.Copy(sanitizedWriter, r)
// check the result, for demo only
fmt.Print(o.String() == expected)
}
Output: true
func SanitizeString ¶
SanitizeString uses the DefaultAllowList to sanitize the HTML string.
Types ¶
type AllowList ¶ added in v1.0.1
type AllowList struct {
// Tags specifies all the allow tags.
Tags []*Tag
// GlobalAttr specifies the allowed attributes for all the tag.
// It's very useful for some common attributes, such as `class`, `id`.
// For security reasons, it's not recommended to set a glboal attr for
// any URL-related attribute.
GlobalAttr []string
// NonHTMLTags defines a set of special tags, such as <script> and <style>.
// The content of these kind of tags is actually not a real HTML content.
// So we should treat it as a single element, without any child elements.
// TODO: rename this one
NonHTMLTags []*Tag
}
AllowList speficies all the allowed HTML tags and its attributes for the filter.
func (*AllowList) FindTag ¶ added in v1.0.1
FindTag finds and returns tag by its name, case insensitive.
func (*AllowList) RemoveTag ¶ added in v1.0.1
RemoveTag removes all tags name `name`, must be lowercase It is not recommended to modify the default list directly, use .Clone() and then modify the new one instead.
Example ¶
package main
import (
"fmt"
"github.com/sym01/htmlsanitizer"
)
func main() {
// sometimes we don't want user to pass HTML with <a> tag
sanitizer := htmlsanitizer.NewHTMLSanitizer()
sanitizer.RemoveTag("a")
data := `
<h1 ClaSs="h1">hello</h1>
<p>
Hello, world<br>
Welcome to use <a href="https://github.com/sym01/htmlsanitizer">htmlsanitizer</a>
</p>`
output, _ := sanitizer.SanitizeString(data)
fmt.Print(output)
}
Output: <h1 class="h1">hello</h1> <p> Hello, world<br> Welcome to use htmlsanitizer </p>
type HTMLSanitizer ¶
type HTMLSanitizer struct {
*AllowList
// URLSanitizer is a func used to sanitize all the URLAttr.
// URLSanitizer returns a sanitzed URL and a bool var indicating
// whether the current attribute is acceptable. If not acceptable,
// the current attribute will be ignored.
// If the func is nil, then DefaultURLSanitizer will be used.
URLSanitizer func(rawURL string) (sanitzed string, ok bool)
}
HTMLSanitizer is a super fast HTML sanitizer for arbitrary HTML content. This is a allowlist-based santizer, of which the time complexity is O(n).
Example (CustomURLSanitizer) ¶
package main
import (
"fmt"
"net/url"
"github.com/sym01/htmlsanitizer"
)
func main() {
// only links with domain name example.com are allowed.
sanitizer := htmlsanitizer.NewHTMLSanitizer()
sanitizer.URLSanitizer = func(rawURL string) (newURL string, ok bool) {
newURL, ok = htmlsanitizer.DefaultURLSanitizer(rawURL)
if !ok {
return
}
u, err := url.Parse(newURL)
if err != nil {
ok = false
return
}
if u.Host == "example.com" {
ok = true
return
}
ok = false
return
}
data := `
<a href="http://others.com">Link</a>
<a href="https://example.com/xxx">Link with example.com</a>
`
output, _ := sanitizer.SanitizeString(data)
fmt.Print(output)
}
Output: <a>Link</a> <a href="https://example.com/xxx">Link with example.com</a>
Example (KeepStyleSheet) ¶
package main
import (
"fmt"
"github.com/sym01/htmlsanitizer"
)
func main() {
sanitizer := htmlsanitizer.NewHTMLSanitizer()
sanitizer.AllowList.Tags = append(sanitizer.AllowList.Tags,
&htmlsanitizer.Tag{Name: "style"},
&htmlsanitizer.Tag{Name: "head"},
&htmlsanitizer.Tag{Name: "body"},
&htmlsanitizer.Tag{Name: "html"},
)
data := `<!doctype html>
<html>
<head>
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
bad-attr: <body></body>;
bad-attr: <body></body >;
bad-attr: <body></ body>;
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
</style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</html>`
output, _ := sanitizer.SanitizeString(data)
fmt.Print(output)
}
Output: <html> <head> <style> body { background-color: #f0f0f2; margin: 0; padding: 0; bad-attr: <body></body>; bad-attr: <body></body >; bad-attr: <body></ body>; font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } </style> </head> <body> <div> <h1>Example Domain</h1> <p><a href="https://www.iana.org/domains/example">More information...</a></p> </div> </body> </html>
Example (NoTagsAllowed) ¶
package main
import (
"fmt"
"github.com/sym01/htmlsanitizer"
)
func main() {
sanitizer := htmlsanitizer.NewHTMLSanitizer()
// just set AllowList to nil to disable all tags
sanitizer.AllowList = nil
// of course nothing will happen here
sanitizer.RemoveTag("a")
data := `
<a href="http://others.com">Link</a>
<a href="https://example.com/xxx">Link with example.com</a>
`
output, _ := sanitizer.SanitizeString(data)
fmt.Print(output)
}
Output: Link Link with example.com
Example (OnlyAllowHrefTag) ¶
package main
import (
"fmt"
"github.com/sym01/htmlsanitizer"
)
func main() {
sanitizer := htmlsanitizer.NewHTMLSanitizer()
sanitizer.AllowList.Tags = []*htmlsanitizer.Tag{
{"a", nil, []string{"href"}},
}
data := `
<details/open/ontoggle=alert(1)></details>
<a href="http://others.com" target="_blank">Link</a>
<a href="https://example.com/xxx">Link with example.com</a>
`
output, _ := sanitizer.SanitizeString(data)
fmt.Print(output)
}
Output: <a href="http://others.com">Link</a> <a href="https://example.com/xxx">Link with example.com</a>
func NewHTMLSanitizer ¶
func NewHTMLSanitizer() *HTMLSanitizer
NewHTMLSanitizer creates a new HTMLSanitizer with the clone of the DefaultAllowList.
func (*HTMLSanitizer) NewWriter ¶
func (f *HTMLSanitizer) NewWriter(w io.Writer) io.Writer
NewWriter returns a new Writer writing sanitized HTML content to w.
func (*HTMLSanitizer) Sanitize ¶
func (f *HTMLSanitizer) Sanitize(data []byte) ([]byte, error)
Sanitize the HTML data and return the sanitzed HTML.
func (*HTMLSanitizer) SanitizeString ¶
func (f *HTMLSanitizer) SanitizeString(data string) (string, error)
SanitizeString sanitizes the HTML string and return the sanitzed HTML.
type Tag ¶
type Tag struct {
// Name for current tag, must be lowercase.
Name string
// Attr specifies the allowed attributes for current tag,
// must be lowercase.
//
// e.g. colspan, rowspan
Attr []string
// URLAttr specifies the allowed, URL-relatedd attributes for current tag,
// must be lowercase.
//
// e.g. src, href
URLAttr []string
}
Tag with its attributes.