Skip to content

Commit

Permalink
Improve performance of 'parseWithNormalisedSpaces' that is unconditio…
Browse files Browse the repository at this point in the history
…nally invoked for each HTML text element

Addresses #2729
  • Loading branch information
qwwdfsad committed Oct 26, 2022
1 parent 4c122a4 commit cb67ef1
Showing 1 changed file with 10 additions and 4 deletions.
Expand Up @@ -44,7 +44,13 @@ internal fun String.parseHtmlEncodedWithNormalisedSpaces(
*/
internal fun String.parseWithNormalisedSpaces(
renderWhiteCharactersAsSpaces: Boolean
): List<DocTag> =
//parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars
//But we dont need to do it for java as it is already parsed with jsoup
Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
): List<DocTag> {
if (!requiresHtmlEncoding()) {
return parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
}
// parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars
// But we dont need to do it for java as it is already parsed with jsoup
return Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
}

private fun String.requiresHtmlEncoding(): Boolean = indexOf('&') != -1

0 comments on commit cb67ef1

Please sign in to comment.