Skip to content

Commit

Permalink
resolves asciidoctor#4468 treat bare URL enclosed in angle brackets a…
Browse files Browse the repository at this point in the history
…s unconstrained syntax
  • Loading branch information
mojavelinux committed Feb 20, 2024
1 parent 31af659 commit c690c8c
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 74 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Bug Fixes::
* Move abstract inside info tag in DocBook output (#3602)
* Honor secondary and tertiary terms on `indexterm` macro when primary term is quoted and contains an equals sign (#3652)
* Remove extra border below doctitle when sidebar toc is collapsed into main content area (#4523)
* Treat bare URL enclosed in angle brackets as unconstrained syntax; match using separate rx to avoid quirks (#4468)

== 2.0.20 (2023-05-18) - @mojavelinux

Expand Down
2 changes: 1 addition & 1 deletion lib/asciidoctor/rx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ module Rx; end
# "https://github.com[]"
# (https://github.com) <= parenthesis not included in autolink
#
InlineLinkRx = %r((^|link:|#{CG_BLANK}|&lt;|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m
InlineLinkRx = %r(\\?&lt;(\\?(?:https?|file|ftp|irc)://)[^\s]*?&gt;|(^|link:|#{CG_BLANK}|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m

# Match a link or e-mail inline macro.
#
Expand Down
150 changes: 77 additions & 73 deletions lib/asciidoctor/substitutors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -532,97 +532,101 @@ def sub_macros text
end

if found_colon && (text.include? '://')
# inline urls, target[text] (optionally prefixed with link: and optionally surrounded by <>)
# inline urls, target[text] (optionally prefixed with link: or enclosed in <>)
text = text.gsub InlineLinkRx do
if (target = $2 + ($3 || $5)).start_with? RS
if $1
# honor the escape
next ($&.slice 0, (rs_idx = $1.length)) + ($&.slice rs_idx + 1, $&.length)
end

prefix, suffix = $1, ''
# NOTE if $4 is set, we're looking at a formal macro (e.g., https://example.org[])
if $4
prefix = '' if prefix == 'link:'
link_text = nil if (link_text = $4).empty?
next $&.slice 1, $&.length if $&.start_with? RS
next %(&lt;#{$&.slice 5, $&.length}) if $1.start_with? RS
target = $&.slice 4, $&.length - 8
next $& if target == $1
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
doc.register :links, target
(Inline.new self, :anchor, link_text, type: :link, target: target, attributes: { 'role' => 'bare' }).convert
else
# invalid macro syntax (link: prefix w/o trailing square brackets or enclosed in double quotes)
# FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much
case prefix
when 'link:', ?", ?'
next $&
end
case $6
when ';'
if prefix == '&lt;' && (target.end_with? '&gt;')
# move surrounding <> out of URL
prefix = ''
target = target.slice 0, target.length - 4
elsif (target = target.chop).end_with? ')'
# move trailing ); out of URL
target = target.chop
suffix = ');'
else
# move trailing ; out of URL
suffix = ';'
# honor the escape
next %(#{$2}#{$&.slice $2.length + 1, $&.length}) if $3.start_with? RS
prefix, target, suffix = $2, $3 + ($4 || $6), ''
# NOTE if $5 is set (the attrlist), we're looking at a formal macro (e.g., https://example.org[])
if $5
prefix = '' if prefix == 'link:'
link_text = nil if (link_text = $5).empty?
else
case prefix
# invalid macro syntax (link: prefix w/o trailing square brackets or URL enclosed in quotes)
# FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much
when 'link:', ?", ?'
next $&
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target.end_with? '://'
when ':'
if (target = target.chop).end_with? ')'
# move trailing ): out of URL
target = target.chop
suffix = '):'
else
# move trailing : out of URL
suffix = ':'
case $7
when ';'
if (target = target.chop).end_with? ')'
# move trailing ); out of URL
target = target.chop
suffix = ');'
else
# move trailing ; out of URL
suffix = ';'
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target == $3
when ':'
if (target = target.chop).end_with? ')'
# move trailing ): out of URL
target = target.chop
suffix = '):'
else
# move trailing : out of URL
suffix = ':'
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target == $3
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target.end_with? '://'
end
end

attrs, link_opts = nil, { type: :link }
link_opts = { type: :link }

if link_text
new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB
if !doc.compat_mode && (link_text.include? '=')
# NOTE if an equals sign (=) is present, extract attributes from link text
link_text, attrs = extract_attributes_from_text link_text, ''
new_link_text = link_text
link_opts[:id] = attrs['id']
end
if link_text
new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB
if !doc.compat_mode && (link_text.include? '=')
# NOTE if an equals sign (=) is present, extract attributes from link text
link_text, attrs = extract_attributes_from_text link_text, ''
new_link_text = link_text
link_opts[:id] = attrs['id']
end

if link_text.end_with? '^'
new_link_text = link_text = link_text.chop
if attrs
attrs['window'] ||= '_blank'
else
attrs = { 'window' => '_blank' }
if link_text.end_with? '^'
new_link_text = link_text = link_text.chop
if attrs
attrs['window'] ||= '_blank'
else
attrs = { 'window' => '_blank' }
end
end
end

if new_link_text && new_link_text.empty?
if new_link_text && new_link_text.empty?
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end
else
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end
else
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end

if bare
if attrs
attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare'
else
attrs = { 'role' => 'bare' }
if bare
if attrs
attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare'
else
attrs = { 'role' => 'bare' }
end
end
end

doc.register :links, (link_opts[:target] = target)
link_opts[:attributes] = attrs if attrs
%(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix})
doc.register :links, (link_opts[:target] = target)
link_opts[:attributes] = attrs if attrs
%(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix})
end
end
end

Expand Down
16 changes: 16 additions & 0 deletions test/links_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,22 @@
assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string('<http://asciidoc.org> is the project page for AsciiDoc.'), 1
end

test 'qualified url surrounded by angled brackets in unconstrained context' do
assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string('URLは<http://asciidoc.org>。fin'), 1
end

test 'multiple qualified urls surrounded by angled brackets in unconstrained context' do
assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string('URLは<http://asciidoc.org>。URLは<http://asciidoc.org>。'), 2
end

test 'qualified url surrounded by escaped angled brackets' do
assert_xpath '//p[text()="<http://asciidoc.org>"]', convert_string('\\<http://asciidoc.org>'), 1
end

test 'escaped qualified url surrounded by angled brackets' do
assert_xpath '//p[text()="<http://asciidoc.org>"]', convert_string('<\\http://asciidoc.org>'), 1
end

test 'qualified url surrounded by round brackets' do
assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string('(http://asciidoc.org) is the project page for AsciiDoc.'), 1
end
Expand Down

0 comments on commit c690c8c

Please sign in to comment.