fix(@angular-devkit/build-angular): use url function lexer to rebase …

…Sass URLs When rebasing URLs found within Sass files (sass/scss), the previous regular expression based searching has been replaced with a lexer that scans the Sass files for CSS url() functions and extracts URL values. This change allows for more accurate discovery of URLs as well as reducing the amount of content traversals per file. The lexer logic is based on CSS Syntax Module Level 3 (https://www.w3.org/TR/css-syntax-3/). (cherry picked from commit 84dc05d)
angular · Nov 23, 2022 · 2220a90 · 2220a90
1 parent fc0d924
commit 2220a90
Showing 1 changed file with 187 additions and 37 deletions.
diff --git a/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts b/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts
@@ -13,12 +13,6 @@ import { basename, dirname, extname, join, relative } from 'node:path';
 import { fileURLToPath, pathToFileURL } from 'node:url';
 import type { FileImporter, Importer, ImporterResult, Syntax } from 'sass';
 
-/**
- * A Regular expression used to find all `url()` functions within a stylesheet.
- * From packages/angular_devkit/build_angular/src/webpack/plugins/postcss-cli-resources.ts
- */
-const URL_REGEXP = /url(?:\(\s*(['"]?))(.*?)(?:\1\s*\))/g;
-
 /**
  * A Sass Importer base class that provides the load logic to rebase all `url()` functions
  * within a stylesheet. The rebasing will ensure that the URLs in the output of the Sass compiler
@@ -45,44 +39,42 @@ abstract class UrlRebasingImporter implements Importer<'sync'> {
 
   load(canonicalUrl: URL): ImporterResult | null {
     const stylesheetPath = fileURLToPath(canonicalUrl);
+    const stylesheetDirectory = dirname(stylesheetPath);
     let contents = readFileSync(stylesheetPath, 'utf-8');
 
     // Rebase any URLs that are found
-    if (contents.includes('url(')) {
-      const stylesheetDirectory = dirname(stylesheetPath);
-
-      let match;
-      URL_REGEXP.lastIndex = 0;
-      let updatedContents;
-      while ((match = URL_REGEXP.exec(contents))) {
-        const originalUrl = match[2];
-
-        // If root-relative, absolute or protocol relative url, leave as-is
-        if (/^((?:\w+:)?\/\/|data:|chrome:|#|\/)/.test(originalUrl)) {
-          continue;
-        }
+    let updatedContents;
+    for (const { start, end, value } of findUrls(contents)) {
+      // Skip if value is empty or a Sass variable
+      if (value.length === 0 || value.startsWith('$')) {
+        continue;
+      }
 
-        const rebasedPath = relative(this.entryDirectory, join(stylesheetDirectory, originalUrl));
+      // Skip if root-relative, absolute or protocol relative url
+      if (/^((?:\w+:)?\/\/|data:|chrome:|#|\/)/.test(value)) {
+        continue;
+      }
 
-        // Normalize path separators and escape characters
-        // https://developer.mozilla.org/en-US/docs/Web/CSS/url#syntax
-        const rebasedUrl = './' + rebasedPath.replace(/\\/g, '/').replace(/[()\s'"]/g, '\\$&');
+      const rebasedPath = relative(this.entryDirectory, join(stylesheetDirectory, value));
 
-        updatedContents ??= new MagicString(contents);
-        updatedContents.update(match.index, match.index + match[0].length, `url(${rebasedUrl})`);
-      }
+      // Normalize path separators and escape characters
+      // https://developer.mozilla.org/en-US/docs/Web/CSS/url#syntax
+      const rebasedUrl = './' + rebasedPath.replace(/\\/g, '/').replace(/[()\s'"]/g, '\\$&');
 
-      if (updatedContents) {
-        contents = updatedContents.toString();
-        if (this.rebaseSourceMaps) {
-          // Generate an intermediate source map for the rebasing changes
-          const map = updatedContents.generateMap({
-            hires: true,
-            includeContent: true,
-            source: canonicalUrl.href,
-          });
-          this.rebaseSourceMaps.set(canonicalUrl.href, map as RawSourceMap);
-        }
+      updatedContents ??= new MagicString(contents);
+      updatedContents.update(start, end, rebasedUrl);
+    }
+
+    if (updatedContents) {
+      contents = updatedContents.toString();
+      if (this.rebaseSourceMaps) {
+        // Generate an intermediate source map for the rebasing changes
+        const map = updatedContents.generateMap({
+          hires: true,
+          includeContent: true,
+          source: canonicalUrl.href,
+        });
+        this.rebaseSourceMaps.set(canonicalUrl.href, map as RawSourceMap);
       }
     }
 
@@ -107,6 +99,164 @@ abstract class UrlRebasingImporter implements Importer<'sync'> {
   }
 }
 
+/**
+ * Determines if a unicode code point is a CSS whitespace character.
+ * @param code The unicode code point to test.
+ * @returns true, if the code point is CSS whitespace; false, otherwise.
+ */
+function isWhitespace(code: number): boolean {
+  // Based on https://www.w3.org/TR/css-syntax-3/#whitespace
+  switch (code) {
+    case 0x0009: // tab
+    case 0x0020: // space
+    case 0x000a: // line feed
+    case 0x000c: // form feed
+    case 0x000d: // carriage return
+      return true;
+    default:
+      return false;
+  }
+}
+
+/**
+ * Scans a CSS or Sass file and locates all valid url function values as defined by the CSS
+ * syntax specification.
+ * @param contents A string containing a CSS or Sass file to scan.
+ * @returns An iterable that yields each CSS url function value found.
+ */
+function* findUrls(contents: string): Iterable<{ start: number; end: number; value: string }> {
+  let pos = 0;
+  let width = 1;
+  let current = -1;
+  const next = () => {
+    pos += width;
+    current = contents.codePointAt(pos) ?? -1;
+    width = current > 0xffff ? 2 : 1;
+
+    return current;
+  };
+
+  // Based on https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token
+  while ((pos = contents.indexOf('url(', pos)) !== -1) {
+    // Set to position of the (
+    pos += 3;
+    width = 1;
+
+    // Consume all leading whitespace
+    while (isWhitespace(next())) {
+      /* empty */
+    }
+
+    // Initialize URL state
+    const url = { start: pos, end: -1, value: '' };
+    let complete = false;
+
+    // If " or ', then consume the value as a string
+    if (current === 0x0022 || current === 0x0027) {
+      const ending = current;
+      // Based on https://www.w3.org/TR/css-syntax-3/#consume-string-token
+      while (!complete) {
+        switch (next()) {
+          case -1: // EOF
+            return;
+          case 0x000a: // line feed
+          case 0x000c: // form feed
+          case 0x000d: // carriage return
+            // Invalid
+            complete = true;
+            break;
+          case 0x005c: // \ -- character escape
+            // If not EOF or newline, add the character after the escape
+            switch (next()) {
+              case -1:
+                return;
+              case 0x000a: // line feed
+              case 0x000c: // form feed
+              case 0x000d: // carriage return
+                // Skip when inside a string
+                break;
+              default:
+                // TODO: Handle hex escape codes
+                url.value += String.fromCodePoint(current);
+                break;
+            }
+            break;
+          case ending:
+            // Full string position should include the quotes for replacement
+            url.end = pos + 1;
+            complete = true;
+            yield url;
+            break;
+          default:
+            url.value += String.fromCodePoint(current);
+            break;
+        }
+      }
+
+      next();
+      continue;
+    }
+
+    // Based on https://www.w3.org/TR/css-syntax-3/#consume-url-token
+    while (!complete) {
+      switch (current) {
+        case -1: // EOF
+          return;
+        case 0x0022: // "
+        case 0x0027: // '
+        case 0x0028: // (
+          // Invalid
+          complete = true;
+          break;
+        case 0x0029: // )
+          // URL is valid and complete
+          url.end = pos;
+          complete = true;
+          break;
+        case 0x005c: // \ -- character escape
+          // If not EOF or newline, add the character after the escape
+          switch (next()) {
+            case -1: // EOF
+              return;
+            case 0x000a: // line feed
+            case 0x000c: // form feed
+            case 0x000d: // carriage return
+              // Invalid
+              complete = true;
+              break;
+            default:
+              // TODO: Handle hex escape codes
+              url.value += String.fromCodePoint(current);
+              break;
+          }
+          break;
+        default:
+          if (isWhitespace(current)) {
+            while (isWhitespace(next())) {
+              /* empty */
+            }
+            // Unescaped whitespace is only valid before the closing )
+            if (current === 0x0029) {
+              // URL is valid
+              url.end = pos;
+            }
+            complete = true;
+          } else {
+            // Add the character to the url value
+            url.value += String.fromCodePoint(current);
+          }
+          break;
+      }
+      next();
+    }
+
+    // An end position indicates a URL was found
+    if (url.end !== -1) {
+      yield url;
+    }
+  }
+}
+
 /**
  * Provides the Sass importer logic to resolve relative stylesheet imports via both import and use rules
  * and also rebase any `url()` function usage within those stylesheets. The rebasing will ensure that