Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite default class extractor #8204

Merged
merged 3 commits into from May 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Expand Up @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Support PostCSS config options in config file in CLI ([#8226](https://github.com/tailwindlabs/tailwindcss/pull/8226))
- Remove default `[hidden]` style in preflight ([#8248](https://github.com/tailwindlabs/tailwindcss/pull/8248))
- Only check selectors containing base apply candidates for circular dependencies ([#8222](https://github.com/tailwindlabs/tailwindcss/pull/8222))
- Handle utilities with multiple and/or grouped selectors better ([#8262](https://github.com/tailwindlabs/tailwindcss/pull/8262))
- Rewrite default class extractor ([#8204](https://github.com/tailwindlabs/tailwindcss/pull/8204))

### Added

Expand Down
184 changes: 151 additions & 33 deletions src/lib/defaultExtractor.js
@@ -1,42 +1,160 @@
const PATTERNS = [
/(?:\['([^'\s]+[^<>"'`\s:\\])')/.source, // ['text-lg' -> text-lg
/(?:\["([^"\s]+[^<>"'`\s:\\])")/.source, // ["text-lg" -> text-lg
/(?:\[`([^`\s]+[^<>"'`\s:\\])`)/.source, // [`text-lg` -> text-lg
/([^${(<>"'`\s]*\[\w*'[^"`\s]*'?\])/.source, // font-['some_font',sans-serif]
/([^${(<>"'`\s]*\[\w*"[^'`\s]*"?\])/.source, // font-["some_font",sans-serif]
/([^<>"'`\s]*\[\w*\('[^"'`\s]*'\)\])/.source, // bg-[url('...')]
/([^<>"'`\s]*\[\w*\("[^"'`\s]*"\)\])/.source, // bg-[url("...")]
/([^<>"'`\s]*\[\w*\('[^"`\s]*'\)\])/.source, // bg-[url('...'),url('...')]
/([^<>"'`\s]*\[\w*\("[^'`\s]*"\)\])/.source, // bg-[url("..."),url("...")]
/([^<>"'`\s]*\[[^<>"'`\s]*\('[^"`\s]*'\)+\])/.source, // h-[calc(100%-theme('spacing.1'))]
/([^<>"'`\s]*\[[^<>"'`\s]*\("[^'`\s]*"\)+\])/.source, // h-[calc(100%-theme("spacing.1"))]
/([^${(<>"'`\s]*\['[^"'`\s]*'\])/.source, // `content-['hello']` but not `content-['hello']']`
/([^${(<>"'`\s]*\["[^"'`\s]*"\])/.source, // `content-["hello"]` but not `content-["hello"]"]`
/([^<>"'`\s]*\[[^<>"'`\s]*:[^\]\s]*\])/.source, // `[attr:value]`
/([^<>"'`\s]*\[[^<>"'`\s]*:'[^"'`\s]*'\])/.source, // `[content:'hello']` but not `[content:"hello"]`
/([^<>"'`\s]*\[[^<>"'`\s]*:"[^"'`\s]*"\])/.source, // `[content:"hello"]` but not `[content:'hello']`
/([^<>"'`\s]*\[[^"'`\s]+\][^<>"'`\s]*)/.source, // `fill-[#bada55]`, `fill-[#bada55]/50`
/([^"'`\s]*[^<>"'`\s:\\])/.source, // `<sm:underline`, `md>:font-bold`
/([^<>"'`\s]*[^"'`\s:\\])/.source, // `px-1.5`, `uppercase` but not `uppercase:`

// Arbitrary properties
// /([^"\s]*\[[^\s]+?\][^"\s]*)/.source,
// /([^'\s]*\[[^\s]+?\][^'\s]*)/.source,
// /([^`\s]*\[[^\s]+?\][^`\s]*)/.source,
].join('|')

const BROAD_MATCH_GLOBAL_REGEXP = new RegExp(PATTERNS, 'g')
const INNER_MATCH_GLOBAL_REGEXP = /[^<>"'`\s.(){}[\]#=%$]*[^<>"'`\s.(){}[\]#=%:$]/g
import * as regex from './regex'

let patterns = Array.from(buildRegExps())

/**
* @param {string} content
*/
export function defaultExtractor(content) {
let broadMatches = content.matchAll(BROAD_MATCH_GLOBAL_REGEXP)
let innerMatches = content.match(INNER_MATCH_GLOBAL_REGEXP) || []
let results = [...broadMatches, ...innerMatches].flat().filter((v) => v !== undefined)
/** @type {(string|string)[]} */
let results = []

for (let pattern of patterns) {
results.push(...(content.match(pattern) ?? []))
}

return results.filter((v) => v !== undefined).map(clipAtBalancedParens)
}

function* buildRegExps() {
yield regex.pattern([
// Variants
/((?=([^\s"'\\\[]+:))\2)?/,

// Important (optional)
/!?/,

regex.any([
// Arbitrary properties
/\[[^\s:'"]+:[^\s\]]+\]/,

// Utilities
regex.pattern([
// Utility Name / Group Name
/-?(?:\w+)/,

// Normal/Arbitrary values
regex.optional(
regex.any([
regex.pattern([
// Arbitrary values
/-\[[^\s:]+\]/,

// Not immediately followed by an `{[(`
/(?![{([]])/,

// optionally followed by an opacity modifier
/(?:\/[^\s'"\\$]*)?/,
]),

regex.pattern([
// Arbitrary values
/-\[[^\s]+\]/,

// Not immediately followed by an `{[(`
/(?![{([]])/,

// optionally followed by an opacity modifier
/(?:\/[^\s'"\\$]*)?/,
]),

// Normal values w/o quotes — may include an opacity modifier
/[-\/][^\s'"\\$={]*/,
])
),
]),
]),
])

// 5. Inner matches
// yield /[^<>"'`\s.(){}[\]#=%$]*[^<>"'`\s.(){}[\]#=%:$]/g
}

// We want to capture any "special" characters
// AND the characters immediately following them (if there is one)
let SPECIALS = /([\[\]'"`])([^\[\]'"`])?/g
let ALLOWED_CLASS_CHARACTERS = /[^"'`\s<>\]]+/

/**
* Clips a string ensuring that parentheses, quotes, etc… are balanced
* Used for arbitrary values only
*
* We will go past the end of the balanced parens until we find a non-class character
*
* Depth matching behavior:
* w-[calc(100%-theme('spacing[some_key][1.5]'))]']
* ┬ ┬ ┬┬ ┬ ┬┬ ┬┬┬┬┬┬┬
* 1 2 3 4 34 3 210 END
* ╰────┴──────────┴────────┴────────┴┴───┴─┴┴┴
*
* @param {string} input
*/
function clipAtBalancedParens(input) {
// We are care about this for arbitrary values
if (!input.includes('-[')) {
return input
}

let depth = 0
let openStringTypes = []

// Find all parens, brackets, quotes, etc
// Stop when we end at a balanced pair
// This is naive and will treat mismatched parens as balanced
// This shouldn't be a problem in practice though
let matches = input.matchAll(SPECIALS)

// We can't use lookbehind assertions because we have to support Safari
// So, instead, we've emulated it using capture groups and we'll re-work the matches to accommodate
matches = Array.from(matches).flatMap((match) => {
const [, ...groups] = match

return groups.map((group, idx) =>
Object.assign([], match, {
index: match.index + idx,
0: group,
})
)
})

for (let match of matches) {
let char = match[0]
let inStringType = openStringTypes[openStringTypes.length - 1]

if (char === inStringType) {
openStringTypes.pop()
} else if (char === "'" || char === '"' || char === '`') {
openStringTypes.push(char)
}

if (inStringType) {
continue
} else if (char === '[') {
depth++
continue
} else if (char === ']') {
depth--
continue
}

// We've gone one character past the point where we should stop
// This means that there was an extra closing `]`
// We'll clip to just before it
if (depth < 0) {
return input.substring(0, match.index)
}

// We've finished balancing the brackets but there still may be characters that can be included
// For example in the class `text-[#336699]/[.35]`
// The depth goes to `0` at the closing `]` but goes up again at the `[`

// If we're at zero and encounter a non-class character then we clip the class there
if (depth === 0 && !ALLOWED_CLASS_CHARACTERS.test(char)) {
return input.substring(0, match.index)
}
}

return results
return input
}

// Regular utilities
Expand Down
14 changes: 11 additions & 3 deletions src/lib/expandApplyAtRules.js
Expand Up @@ -34,6 +34,15 @@ function extractClasses(node) {
return Object.assign(classes, { groups: normalizedGroups })
}

let selectorExtractor = parser((root) => root.nodes.map((node) => node.toString()))

/**
* @param {string} ruleSelectors
*/
function extractSelectors(ruleSelectors) {
return selectorExtractor.transformSync(ruleSelectors)
}

function extractBaseCandidates(candidates, separator) {
let baseClasses = new Set()

Expand Down Expand Up @@ -295,10 +304,9 @@ function processApply(root, context, localCache) {
function replaceSelector(selector, utilitySelectors, candidate) {
let needle = `.${escapeClassName(candidate)}`
let needles = [...new Set([needle, needle.replace(/\\2c /g, '\\,')])]
let utilitySelectorsList = utilitySelectors.split(/\s*(?<!\\)\,(?![^(]*\))\s*/g)
let utilitySelectorsList = extractSelectors(utilitySelectors)

return selector
.split(/\s*(?<!\\)\,(?![^(]*\))\s*/g)
return extractSelectors(selector)
.map((s) => {
let replaced = []

Expand Down
6 changes: 3 additions & 3 deletions src/lib/expandTailwindAtRules.js
Expand Up @@ -17,8 +17,8 @@ const builtInTransformers = {
svelte: (content) => content.replace(/(?:^|\s)class:/g, ' '),
}

function getExtractor(tailwindConfig, fileExtension) {
let extractors = tailwindConfig.content.extract
function getExtractor(context, fileExtension) {
let extractors = context.tailwindConfig.content.extract

return (
extractors[fileExtension] ||
Expand Down Expand Up @@ -165,7 +165,7 @@ export default function expandTailwindAtRules(context) {

for (let { content, extension } of context.changedContent) {
let transformer = getTransformer(context.tailwindConfig, extension)
let extractor = getExtractor(context.tailwindConfig, extension)
let extractor = getExtractor(context, extension)
getClassCandidates(transformer(content), extractor, candidates, seen)
}

Expand Down
74 changes: 74 additions & 0 deletions src/lib/regex.js
@@ -0,0 +1,74 @@
const REGEX_SPECIAL = /[\\^$.*+?()[\]{}|]/g
const REGEX_HAS_SPECIAL = RegExp(REGEX_SPECIAL.source)

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
function toSource(source) {
source = Array.isArray(source) ? source : [source]

source = source.map((item) => (item instanceof RegExp ? item.source : item))

return source.join('')
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function pattern(source) {
return new RegExp(toSource(source), 'g')
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function withoutCapturing(source) {
return new RegExp(`(?:${toSource(source)})`, 'g')
}

/**
* @param {Array<string|RegExp>} sources
*/
export function any(sources) {
return `(?:${sources.map(toSource).join('|')})`
}

/**
* @param {string|RegExp} source
*/
export function optional(source) {
return `(?:${toSource(source)})?`
}

/**
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function zeroOrMore(source) {
return `(?:${toSource(source)})*`
}

/**
* Generate a RegExp that matches balanced brackets for a given depth
* We have to specify a depth because JS doesn't support recursive groups using ?R
*
* Based on https://stackoverflow.com/questions/17759004/how-to-match-string-within-parentheses-nested-in-java/17759264#17759264
*
* @param {string|RegExp|Array<string|RegExp>} source
*/
export function nestedBrackets(open, close, depth = 1) {
return withoutCapturing([
escape(open),
/[^\s]*/,
depth === 1
? `[^${escape(open)}${escape(close)}\s]*`
: any([`[^${escape(open)}${escape(close)}\s]*`, nestedBrackets(open, close, depth - 1)]),
/[^\s]*/,
escape(close),
])
}

export function escape(string) {
return string && REGEX_HAS_SPECIAL.test(string)
? string.replace(REGEX_SPECIAL, '\\$&')
: string || ''
}
3 changes: 3 additions & 0 deletions tests/arbitrary-values.test.css
Expand Up @@ -316,6 +316,9 @@
.cursor-\[url\(hand\.cur\)_2_2\2c pointer\] {
cursor: url(hand.cur) 2 2, pointer;
}
.cursor-\[url\(\'\.\/path_to_hand\.cur\'\)_2_2\2c pointer\] {
cursor: url("./path_to_hand.cur") 2 2, pointer;
}
.cursor-\[var\(--value\)\] {
cursor: var(--value);
}
Expand Down
29 changes: 29 additions & 0 deletions tests/basic-usage.test.js
Expand Up @@ -401,3 +401,32 @@ it('should generate styles using :not(.unknown-class) even if `.unknown-class` d
`)
})
})

it('supports multiple backgrounds as arbitrary values even if only some are quoted', () => {
let config = {
content: [
{
raw: html`<div
class="bg-[url('/images/one-two-three.png'),linear-gradient(to_right,_#eeeeee,_#000000)]"
></div>`,
},
],
corePlugins: { preflight: false },
}

let input = css`
@tailwind utilities;
`

return run(input, config).then((result) => {
expect(result.css).toMatchFormattedCss(css`
.bg-\[url\(\'\/images\/one-two-three\.png\'\)\2c
linear-gradient\(to_right\2c
_\#eeeeee\2c
_\#000000\)\] {
background-image: url('/images/one-two-three.png'),
linear-gradient(to right, #eeeeee, #000000);
}
`)
})
})