Skip to content

Commit

Permalink
Rewrite default class extractor (#8204)
Browse files Browse the repository at this point in the history
* Rewrite default extractor

* Eliminate lookbehind assertions in expand apply at rules

* Update changelog
  • Loading branch information
thecrypticace committed May 4, 2022
1 parent bb0ab67 commit d676086
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Expand Up @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Support PostCSS config options in config file in CLI ([#8226](https://github.com/tailwindlabs/tailwindcss/pull/8226))
- Remove default `[hidden]` style in preflight ([#8248](https://github.com/tailwindlabs/tailwindcss/pull/8248))
- Only check selectors containing base apply candidates for circular dependencies ([#8222](https://github.com/tailwindlabs/tailwindcss/pull/8222))
- Handle utilities with multiple and/or grouped selectors better ([#8262](https://github.com/tailwindlabs/tailwindcss/pull/8262))
- Rewrite default class extractor ([#8204](https://github.com/tailwindlabs/tailwindcss/pull/8204))

### Added

Expand Down
184 changes: 151 additions & 33 deletions src/lib/defaultExtractor.js
@@ -1,42 +1,160 @@
const PATTERNS = [
/(?:\['([^'\s]+[^<>"'`\s:\\])')/.source, // ['text-lg' -> text-lg
/(?:\["([^"\s]+[^<>"'`\s:\\])")/.source, // ["text-lg" -> text-lg
/(?:\[`([^`\s]+[^<>"'`\s:\\])`)/.source, // [`text-lg` -> text-lg
/([^${(<>"'`\s]*\[\w*'[^"`\s]*'?\])/.source, // font-['some_font',sans-serif]
/([^${(<>"'`\s]*\[\w*"[^'`\s]*"?\])/.source, // font-["some_font",sans-serif]
/([^<>"'`\s]*\[\w*\('[^"'`\s]*'\)\])/.source, // bg-[url('...')]
/([^<>"'`\s]*\[\w*\("[^"'`\s]*"\)\])/.source, // bg-[url("...")]
/([^<>"'`\s]*\[\w*\('[^"`\s]*'\)\])/.source, // bg-[url('...'),url('...')]
/([^<>"'`\s]*\[\w*\("[^'`\s]*"\)\])/.source, // bg-[url("..."),url("...")]
/([^<>"'`\s]*\[[^<>"'`\s]*\('[^"`\s]*'\)+\])/.source, // h-[calc(100%-theme('spacing.1'))]
/([^<>"'`\s]*\[[^<>"'`\s]*\("[^'`\s]*"\)+\])/.source, // h-[calc(100%-theme("spacing.1"))]
/([^${(<>"'`\s]*\['[^"'`\s]*'\])/.source, // `content-['hello']` but not `content-['hello']']`
/([^${(<>"'`\s]*\["[^"'`\s]*"\])/.source, // `content-["hello"]` but not `content-["hello"]"]`
/([^<>"'`\s]*\[[^<>"'`\s]*:[^\]\s]*\])/.source, // `[attr:value]`
/([^<>"'`\s]*\[[^<>"'`\s]*:'[^"'`\s]*'\])/.source, // `[content:'hello']` but not `[content:"hello"]`
/([^<>"'`\s]*\[[^<>"'`\s]*:"[^"'`\s]*"\])/.source, // `[content:"hello"]` but not `[content:'hello']`
/([^<>"'`\s]*\[[^"'`\s]+\][^<>"'`\s]*)/.source, // `fill-[#bada55]`, `fill-[#bada55]/50`
/([^"'`\s]*[^<>"'`\s:\\])/.source, // `<sm:underline`, `md>:font-bold`
/([^<>"'`\s]*[^"'`\s:\\])/.source, // `px-1.5`, `uppercase` but not `uppercase:`

// Arbitrary properties
// /([^"\s]*\[[^\s]+?\][^"\s]*)/.source,
// /([^'\s]*\[[^\s]+?\][^'\s]*)/.source,
// /([^`\s]*\[[^\s]+?\][^`\s]*)/.source,
].join('|')

const BROAD_MATCH_GLOBAL_REGEXP = new RegExp(PATTERNS, 'g')
const INNER_MATCH_GLOBAL_REGEXP = /[^<>"'`\s.(){}[\]#=%$]*[^<>"'`\s.(){}[\]#=%:$]/g
import * as regex from './regex'

let patterns = Array.from(buildRegExps())

/**
* @param {string} content
*/
export function defaultExtractor(content) {
let broadMatches = content.matchAll(BROAD_MATCH_GLOBAL_REGEXP)
let innerMatches = content.match(INNER_MATCH_GLOBAL_REGEXP) || []
let results = [...broadMatches, ...innerMatches].flat().filter((v) => v !== undefined)
/** @type {(string|string)[]} */
let results = []

for (let pattern of patterns) {
results.push(...(content.match(pattern) ?? []))
}

return results.filter((v) => v !== undefined).map(clipAtBalancedParens)
}

function* buildRegExps() {
yield regex.pattern([
// Variants
/((?=([^\s"'\\\[]+:))\2)?/,

// Important (optional)
/!?/,

regex.any([
// Arbitrary properties
/\[[^\s:'"]+:[^\s\]]+\]/,

// Utilities
regex.pattern([
// Utility Name / Group Name
/-?(?:\w+)/,

// Normal/Arbitrary values
regex.optional(
regex.any([
regex.pattern([
// Arbitrary values
/-\[[^\s:]+\]/,

// Not immediately followed by an `{[(`
/(?![{([]])/,

// optionally followed by an opacity modifier
/(?:\/[^\s'"\\$]*)?/,
]),

regex.pattern([
// Arbitrary values
/-\[[^\s]+\]/,

// Not immediately followed by an `{[(`
/(?![{([]])/,

// optionally followed by an opacity modifier
/(?:\/[^\s'"\\$]*)?/,
]),

// Normal values w/o quotes — may include an opacity modifier
/[-\/][^\s'"\\$={]*/,
])
),
]),
]),
])

// 5. Inner matches
// yield /[^<>"'`\s.(){}[\]#=%$]*[^<>"'`\s.(){}[\]#=%:$]/g
}

// We want to capture any "special" characters
// AND the characters immediately following them (if there is one)
let SPECIALS = /([\[\]'"`])([^\[\]'"`])?/g
let ALLOWED_CLASS_CHARACTERS = /[^"'`\s<>\]]+/

/**
* Clips a string ensuring that parentheses, quotes, etc… are balanced
* Used for arbitrary values only
*
* We will go past the end of the balanced parens until we find a non-class character
*
* Depth matching behavior:
* w-[calc(100%-theme('spacing[some_key][1.5]'))]']
* ┬ ┬ ┬┬ ┬ ┬┬ ┬┬┬┬┬┬┬
* 1 2 3 4 34 3 210 END
* ╰────┴──────────┴────────┴────────┴┴───┴─┴┴┴
*
* @param {string} input
*/
function clipAtBalancedParens(input) {
// We are care about this for arbitrary values
if (!input.includes('-[')) {
return input
}

let depth = 0
let openStringTypes = []

// Find all parens, brackets, quotes, etc
// Stop when we end at a balanced pair
// This is naive and will treat mismatched parens as balanced
// This shouldn't be a problem in practice though
let matches = input.matchAll(SPECIALS)

// We can't use lookbehind assertions because we have to support Safari
// So, instead, we've emulated it using capture groups and we'll re-work the matches to accommodate
matches = Array.from(matches).flatMap((match) => {
const [, ...groups] = match

return groups.map((group, idx) =>
Object.assign([], match, {
index: match.index + idx,
0: group,
})
)
})

for (let match of matches) {
let char = match[0]
let inStringType = openStringTypes[openStringTypes.length - 1]

if (char === inStringType) {
openStringTypes.pop()
} else if (char === "'" || char === '"' || char === '`') {
openStringTypes.push(char)
}

if (inStringType) {
continue
} else if (char === '[') {
depth++
continue
} else if (char === ']') {
depth--
continue
}

// We've gone one character past the point where we should stop
// This means that there was an extra closing `]`
// We'll clip to just before it
if (depth < 0) {
return input.substring(0, match.index)
}

// We've finished balancing the brackets but there still may be characters that can be included
// For example in the class `text-[#336699]/[.35]`
// The depth goes to `0` at the closing `]` but goes up again at the `[`

// If we're at zero and encounter a non-class character then we clip the class there
if (depth === 0 && !ALLOWED_CLASS_CHARACTERS.test(char)) {
return input.substring(0, match.index)
}
}

return results
return input
}

// Regular utilities
Expand Down
14 changes: 11 additions & 3 deletions src/lib/expandApplyAtRules.js
Expand Up @@ -34,6 +34,15 @@ function extractClasses(node) {
return Object.assign(classes, { groups: normalizedGroups })
}

let selectorExtractor = parser((root) => root.nodes.map((node) => node.toString()))

/**
* @param {string} ruleSelectors
*/
function extractSelectors(ruleSelectors) {
return selectorExtractor.transformSync(ruleSelectors)
}

function extractBaseCandidates(candidates, separator) {
let baseClasses = new Set()

Expand Down Expand Up @@ -295,10 +304,9 @@ function processApply(root, context, localCache) {
function replaceSelector(selector, utilitySelectors, candidate) {
let needle = `.${escapeClassName(candidate)}`
let needles = [...new Set([needle, needle.replace(/\\2c /g, '\\,')])]
let utilitySelectorsList = utilitySelectors.split(/\s*(?<!\\)\,(?![^(]*\))\s*/g)
let utilitySelectorsList = extractSelectors(utilitySelectors)

return selector
.split(/\s*(?<!\\)\,(?![^(]*\))\s*/g)
return extractSelectors(selector)
.map((s) => {
let replaced = []

Expand Down
6 changes: 3 additions & 3 deletions src/lib/expandTailwindAtRules.js
Expand Up @@ -17,8 +17,8 @@ const builtInTransformers = {
svelte: (content) => content.replace(/(?:^|\s)class:/g, ' '),
}

function getExtractor(tailwindConfig, fileExtension) {
let extractors = tailwindConfig.content.extract
function getExtractor(context, fileExtension) {
let extractors = context.tailwindConfig.content.extract

return (
extractors[fileExtension] ||
Expand Down Expand Up @@ -165,7 +165,7 @@ export default function expandTailwindAtRules(context) {

for (let { content, extension } of context.changedContent) {
let transformer = getTransformer(context.tailwindConfig, extension)
let extractor = getExtractor(context.tailwindConfig, extension)
let extractor = getExtractor(context, extension)
getClassCandidates(transformer(content), extractor, candidates, seen)
}

Expand Down
74 changes: 74 additions & 0 deletions src/lib/regex.js
@@ -0,0 +1,74 @@
const REGEX_SPECIAL = /[\\^$.*+?()[\]{}|]/g
const REGEX_HAS_SPECIAL = RegExp(REGEX_SPECIAL.source)

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
function toSource(source) {
source = Array.isArray(source) ? source : [source]

source = source.map((item) => (item instanceof RegExp ? item.source : item))

return source.join('')
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function pattern(source) {
return new RegExp(toSource(source), 'g')
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function withoutCapturing(source) {
return new RegExp(`(?:${toSource(source)})`, 'g')
}

/**
* @param {Array<string|RegExp>} sources
*/
export function any(sources) {
return `(?:${sources.map(toSource).join('|')})`
}

/**
* @param {string|RegExp} source
*/
export function optional(source) {
return `(?:${toSource(source)})?`
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function zeroOrMore(source) {
return `(?:${toSource(source)})*`
}

/**
* Generate a RegExp that matches balanced brackets for a given depth
* We have to specify a depth because JS doesn't support recursive groups using ?R
*
* Based on https://stackoverflow.com/questions/17759004/how-to-match-string-within-parentheses-nested-in-java/17759264#17759264
*
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function nestedBrackets(open, close, depth = 1) {
return withoutCapturing([
escape(open),
/[^\s]*/,
depth === 1
? `[^${escape(open)}${escape(close)}\s]*`
: any([`[^${escape(open)}${escape(close)}\s]*`, nestedBrackets(open, close, depth - 1)]),
/[^\s]*/,
escape(close),
])
}

export function escape(string) {
return string && REGEX_HAS_SPECIAL.test(string)
? string.replace(REGEX_SPECIAL, '\\$&')
: string || ''
}
3 changes: 3 additions & 0 deletions tests/arbitrary-values.test.css
Expand Up @@ -316,6 +316,9 @@
.cursor-\[url\(hand\.cur\)_2_2\2c pointer\] {
cursor: url(hand.cur) 2 2, pointer;
}
.cursor-\[url\(\'\.\/path_to_hand\.cur\'\)_2_2\2c pointer\] {
cursor: url("./path_to_hand.cur") 2 2, pointer;
}
.cursor-\[var\(--value\)\] {
cursor: var(--value);
}
Expand Down
29 changes: 29 additions & 0 deletions tests/basic-usage.test.js
Expand Up @@ -401,3 +401,32 @@ it('should generate styles using :not(.unknown-class) even if `.unknown-class` d
`)
})
})

it('supports multiple backgrounds as arbitrary values even if only some are quoted', () => {
let config = {
content: [
{
raw: html`<div
class="bg-[url('/images/one-two-three.png'),linear-gradient(to_right,_#eeeeee,_#000000)]"
></div>`,
},
],
corePlugins: { preflight: false },
}

let input = css`
@tailwind utilities;
`

return run(input, config).then((result) => {
expect(result.css).toMatchFormattedCss(css`
.bg-\[url\(\'\/images\/one-two-three\.png\'\)\2c
linear-gradient\(to_right\2c
_\#eeeeee\2c
_\#000000\)\] {
background-image: url('/images/one-two-three.png'),
linear-gradient(to right, #eeeeee, #000000);
}
`)
})
})

0 comments on commit d676086

Please sign in to comment.