Skip to content

Commit

Permalink
Merge pull request #175 from yardenshoham/url-regexp
Browse files Browse the repository at this point in the history
Allow custom URL schemes by matching regex
  • Loading branch information
buro9 committed May 18, 2023
2 parents c506024 + e8dc3b7 commit f6787c6
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
12 changes: 12 additions & 0 deletions policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ type Policy struct {
// returning true are allowed.
allowURLSchemes map[string][]urlPolicy

// These regexps are used to match allowed URL schemes, for example
// if one would want to allow all URL schemes, they would add `.+`
allowURLSchemeRegexps []*regexp.Regexp

// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
Expand Down Expand Up @@ -221,6 +225,7 @@ func (p *Policy) init() {
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
p.globalStyles = make(map[string][]stylePolicy)
p.allowURLSchemes = make(map[string][]urlPolicy)
p.allowURLSchemeRegexps = make([]*regexp.Regexp, 0)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
p.setOfElementsToSkipContent = make(map[string]struct{})
p.initialized = true
Expand Down Expand Up @@ -563,6 +568,13 @@ func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
return p
}

// AllowURLSchemesMatching will append URL schemes to the allowlist if they
// match a regexp.
func (p *Policy) AllowURLSchemesMatching(r *regexp.Regexp) *Policy {
p.allowURLSchemeRegexps = append(p.allowURLSchemeRegexps, r)
return p
}

// RequireNoFollowOnLinks will result in all a, area, link tags having a
// rel="nofollow"added to them if one does not already exist
//
Expand Down
5 changes: 5 additions & 0 deletions sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,11 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
}

if u.Scheme != "" {
for _, r := range p.allowURLSchemeRegexps {
if r.MatchString(u.Scheme) {
return u.String(), true
}
}

urlPolicies, ok := p.allowURLSchemes[u.Scheme]
if !ok {
Expand Down
23 changes: 23 additions & 0 deletions sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3985,3 +3985,26 @@ func TestIssue171(t *testing.T) {
expected)
}
}

func TestIssue174(t *testing.T) {
// https://github.com/microcosm-cc/bluemonday/issues/174
//
// Allow all URL schemes
p := UGCPolicy()
p.AllowURLSchemesMatching(regexp.MustCompile(`.+`))

input := `<a href="cbthunderlink://somebase64string"></a>
<a href="matrix:roomid/psumPMeAfzgAeQpXMG:feneas.org?action=join"></a>
<a href="https://github.com"></a>`
out := p.Sanitize(input)
expected := `<a href="cbthunderlink://somebase64string" rel="nofollow"></a>
<a href="matrix:roomid/psumPMeAfzgAeQpXMG:feneas.org?action=join" rel="nofollow"></a>
<a href="https://github.com" rel="nofollow"></a>`
if out != expected {
t.Errorf(
"test failed;\ninput : %s\noutput : %s\nexpected: %s",
input,
out,
expected)
}
}

0 comments on commit f6787c6

Please sign in to comment.