Get SHA'ed path via regexp, not file contents

The JavaScript files produced by rollup have SHAs in their names that are not just generated from their final contents*. Because of this, it is tricky to determine their filename. This proposes a different approach to the file_fingerprint filter, changing it so it gets the SHA'ed filename by regexp rather than using the same hashing as the gulp task. *This issue contains some useful information on how rollup generates its hashes: rollup/rollup#2839
alphagov · Jun 30, 2021 · 3e76796 · 3e76796
1 parent 54a10f3
commit 3e76796
Showing 1 changed file with 12 additions and 5 deletions.
diff --git a/lib/utils.py b/lib/utils.py
@@ -1,5 +1,5 @@
-import hashlib
 import os
+import re
 from pathlib import Path
 
 from jinja2 import Markup, escape
@@ -20,7 +20,14 @@ def paragraphize(value, classes="govuk-body-l govuk-!-margin-bottom-4"):
 
 
 def file_fingerprint(path, root=DIST):
-    contents = open(str(root) + path, 'rb').read()
-    hash = hashlib.sha256(contents).hexdigest()[:8]
-    filename, extension = os.path.splitext(path)
-    return f'{filename}-{hash}{extension}'
+    path = Path(path).relative_to('/')  # path comes in as absolute, rooted to the dist folder
+    path_regex = re.compile(f'^{path.stem}-[0-9a-z]{{8}}.js$')  # regexp based on the filename + a 8 char hash
+    matches = [
+                filename for filename
+                in os.listdir(str(root.joinpath(path.parent)))
+                if path_regex.search(filename)]
+
+    if len(matches) > 0:
+        return f'/{path.parent}/{matches[0]}'
+    else:
+        return str(path)  # default to the path sent