From a17927fad54ae24b2a5ecc6abca593720efcadfb Mon Sep 17 00:00:00 2001
From: Ben Dunkin <chewygumball@gmail.com>
Date: Sun, 11 Jul 2021 18:08:14 -0700
Subject: [PATCH] make the integration test comprehensive

---
 tests/TestScripts/testSharding.py | 123 +++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 28 deletions(-)

diff --git a/tests/TestScripts/testSharding.py b/tests/TestScripts/testSharding.py
index 9237d80538..3bb32c44e9 100644
--- a/tests/TestScripts/testSharding.py
+++ b/tests/TestScripts/testSharding.py
@@ -8,15 +8,33 @@
 the same relative order.
 """
 
+import itertools
+import multiprocessing
+import random
 import subprocess
 import sys
-import random
 import xml.etree.ElementTree as ET
 
-def list_tests(self_test_exe, tags, rng_seed):
-    cmd = [self_test_exe, '--reporter', 'xml', '--list-tests', '--order', 'rand',
-            '--rng-seed', str(rng_seed)]
-    tags_arg = ','.join('[{}]~[.]'.format(t) for t in tags)
+from collections import namedtuple
+
+TestCase = namedtuple("TestCase", ("shard_count", "order", "tags", "rng_seed"))
+
+def list_tests(self_test_exe, test_case, shard_index=None): # tags, rng_seed, shard_count=None, shard_index=None):
+    cmd = [
+        self_test_exe,
+        '--reporter', 'xml',
+        '--list-tests',
+        '--order', test_case.order,
+        '--rng-seed', str(test_case.rng_seed)
+    ]
+
+    if shard_index is not None:
+        cmd.extend([
+            "--shard-count", str(test_case.shard_count),
+            "--shard-index", str(shard_index)
+        ])
+
+    tags_arg = ','.join('[{}]~[.]'.format(t) for t in test_case.tags)
     if tags_arg:
         cmd.append(tags_arg)
     process = subprocess.Popen(
@@ -33,36 +51,85 @@ def list_tests(self_test_exe, tags, rng_seed):
             len(result)))
     return result
 
-def check_is_sublist_of(shorter, longer):
-    assert len(shorter) < len(longer)
-    assert len(set(longer)) == len(longer)
+def execute_tests(self_test_exe, test_case, shard_index):
+    cmd = [
+        self_test_exe,
+        '--reporter', 'xml',
+        '--order', test_case.order,
+        '--rng-seed', str(test_case.rng_seed),
+        "--shard-count", str(test_case.shard_count),
+        "--shard-index", str(shard_index)
+    ]
+
+    tags_arg = ','.join('[{}]~[.]'.format(t) for t in test_case.tags)
+    if tags_arg:
+        cmd.append(tags_arg)
+    process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    if stderr:
+        raise RuntimeError("Unexpected error output:\n" + process.stderr)
+
+    root = ET.fromstring(stdout)
+    result = [elem.attrib["name"] for elem in root.findall('./Group/TestCase')]
+
+    if len(result) < 2:
+        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
+            len(result)))
+    return result
+
+def check_listed_tests_match(all_tests, sharded_tests):
+    # Make sure the number of tests in the full list, and the sharded lists are the same.
+    total_test_count = len(all_tests)
+    sharded_test_count = sum([len(shard) for shard in sharded_tests])
+
+    assert total_test_count == sharded_test_count, f"Sharded test count ({sharded_test_count}) does not match the total test count ({total_test_count})"
+
+    # Make sure all the tests in the shards are from the full list, in the same order. Together with the previous check, this
+    # ensures that all tests in the full list, are in the shards without duplication.
+    test_index = 0
+    for shard_index, shard in enumerate(sharded_tests):
+        for shard_test_index, test_name in enumerate(shard):
+            assert test_name == all_tests[test_index], f"Sharding does not split the test list while maintaining order {test_index}:\n'{test_name}' vs '{all_tests[test_index]}'"
+
+            test_index += 1
+
+def check_listed_and_executed_tests_match(listed_tests, executed_tests):
+    for shard_index, listed_shard in enumerate(listed_tests):
+        listed_shard_names = set(listed_shard)
+        executed_shard_names = set(executed_tests[shard_index])
+
+        listed_string = "\n".join(listed_shard_names)
+        exeucted_string = "\n".join(executed_shard_names)
+
+        assert listed_shard_names == executed_shard_names, f"Executed tests do not match the listed tests:\nExecuted:\n{exeucted_string}\n\nListed:\n{listed_string}"
+
+def test_shards_cover_all_test(self_test_exe, test_case):
+    all_tests = list_tests(self_test_exe, test_case)
+    sharded_tests = [list_tests(self_test_exe, test_case, index) for index in range(test_case.shard_count)]
+
+    check_listed_tests_match(all_tests, sharded_tests)
+
+    executed_tests = [execute_tests(self_test_exe, test_case, index) for index in range(test_case.shard_count)]
+
+    check_listed_and_executed_tests_match(sharded_tests, executed_tests)
 
-    indexes_in_longer = {s: i for i, s in enumerate(longer)}
-    for s1, s2 in zip(shorter, shorter[1:]):
-        assert indexes_in_longer[s1] < indexes_in_longer[s2], (
-                '{} comes before {} in longer list.\n'
-                'Longer: {}\nShorter: {}'.format(s2, s1, longer, shorter))
 
 def main():
     self_test_exe, = sys.argv[1:]
 
-    test_cases = [
-        (1, 0), # default values, 1 shard, execute index 0
-        (1, 1), # 1 shard, invalid index (should still execute using the last valid index)
-        (4, 2), # 4 shards, second index
-    ]
-
-    # We want a random seed for the test, but want to avoid 0,
-    # because it has special meaning
-    seed = random.randint(1, 2 ** 32 - 1)
+    # We want a random seed for the test, but want to avoid 0, because it has special meaning
+    shard_counts = [1, 5]
+    seeds = [random.randint(1, 2 ** 32 - 1), random.randint(1, 2 ** 32 - 1)]
+    tags = [["generators"], ["generators", "matchers"], []]
+    orders = ["rand", "decl", "lex"]
 
-    list_one_tag = list_tests(self_test_exe, ['generators'], seed)
-    list_two_tags = list_tests(self_test_exe, ['generators', 'matchers'], seed)
-    list_all = list_tests(self_test_exe, [], seed)
+    test_cases = [TestCase(*t) for t in itertools.product(shard_counts, orders, tags, seeds)]
 
-    # First, verify that restricting to a subset yields the same order
-    check_is_sublist_of(list_two_tags, list_all)
-    check_is_sublist_of(list_one_tag, list_two_tags)
+    # We use multiprocessing here because there are quite a few test cases, and running them
+    # serially is slow
+    pool = multiprocessing.Pool()
+    pool.starmap(test_shards_cover_all_test, itertools.product([self_test_exe], test_cases))
 
 if __name__ == '__main__':
     sys.exit(main())