From 3e76796744bb5d0cf08e30a7cea9577f195602e9 Mon Sep 17 00:00:00 2001
From: Tom Byers <tombaromba@gmail.com>
Date: Wed, 30 Jun 2021 14:34:55 +0100
Subject: [PATCH] Get SHA'ed path via regexp, not file contents

The JavaScript files produced by rollup have SHAs
in their names that are not just generated from
their final contents*. Because of this, it is
tricky to determine their filename.

This proposes a different approach to the
file_fingerprint filter, changing it so it gets
the SHA'ed filename by regexp rather than using
the same hashing as the gulp task.

*This issue contains some useful information on
how rollup generates its hashes:

https://github.com/rollup/rollup/issues/2839
---
 lib/utils.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/lib/utils.py b/lib/utils.py
index bd707404..937def4a 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,5 +1,5 @@
-import hashlib
 import os
+import re
 from pathlib import Path
 
 from jinja2 import Markup, escape
@@ -20,7 +20,14 @@ def paragraphize(value, classes="govuk-body-l govuk-!-margin-bottom-4"):
 
 
 def file_fingerprint(path, root=DIST):
-    contents = open(str(root) + path, 'rb').read()
-    hash = hashlib.sha256(contents).hexdigest()[:8]
-    filename, extension = os.path.splitext(path)
-    return f'{filename}-{hash}{extension}'
+    path = Path(path).relative_to('/')  # path comes in as absolute, rooted to the dist folder
+    path_regex = re.compile(f'^{path.stem}-[0-9a-z]{{8}}.js$')  # regexp based on the filename + a 8 char hash
+    matches = [
+                filename for filename
+                in os.listdir(str(root.joinpath(path.parent)))
+                if path_regex.search(filename)]
+
+    if len(matches) > 0:
+        return f'/{path.parent}/{matches[0]}'
+    else:
+        return str(path)  # default to the path sent