From a17927fad54ae24b2a5ecc6abca593720efcadfb Mon Sep 17 00:00:00 2001 From: Ben Dunkin Date: Sun, 11 Jul 2021 18:08:14 -0700 Subject: [PATCH] make the integration test comprehensive --- tests/TestScripts/testSharding.py | 123 +++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 28 deletions(-) diff --git a/tests/TestScripts/testSharding.py b/tests/TestScripts/testSharding.py index 9237d80538..3bb32c44e9 100644 --- a/tests/TestScripts/testSharding.py +++ b/tests/TestScripts/testSharding.py @@ -8,15 +8,33 @@ the same relative order. """ +import itertools +import multiprocessing +import random import subprocess import sys -import random import xml.etree.ElementTree as ET -def list_tests(self_test_exe, tags, rng_seed): - cmd = [self_test_exe, '--reporter', 'xml', '--list-tests', '--order', 'rand', - '--rng-seed', str(rng_seed)] - tags_arg = ','.join('[{}]~[.]'.format(t) for t in tags) +from collections import namedtuple + +TestCase = namedtuple("TestCase", ("shard_count", "order", "tags", "rng_seed")) + +def list_tests(self_test_exe, test_case, shard_index=None): # tags, rng_seed, shard_count=None, shard_index=None): + cmd = [ + self_test_exe, + '--reporter', 'xml', + '--list-tests', + '--order', test_case.order, + '--rng-seed', str(test_case.rng_seed) + ] + + if shard_index is not None: + cmd.extend([ + "--shard-count", str(test_case.shard_count), + "--shard-index", str(shard_index) + ]) + + tags_arg = ','.join('[{}]~[.]'.format(t) for t in test_case.tags) if tags_arg: cmd.append(tags_arg) process = subprocess.Popen( @@ -33,36 +51,85 @@ def list_tests(self_test_exe, tags, rng_seed): len(result))) return result -def check_is_sublist_of(shorter, longer): - assert len(shorter) < len(longer) - assert len(set(longer)) == len(longer) +def execute_tests(self_test_exe, test_case, shard_index): + cmd = [ + self_test_exe, + '--reporter', 'xml', + '--order', test_case.order, + '--rng-seed', str(test_case.rng_seed), + "--shard-count", str(test_case.shard_count), + "--shard-index", str(shard_index) + ] + + tags_arg = ','.join('[{}]~[.]'.format(t) for t in test_case.tags) + if tags_arg: + cmd.append(tags_arg) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + if stderr: + raise RuntimeError("Unexpected error output:\n" + process.stderr) + + root = ET.fromstring(stdout) + result = [elem.attrib["name"] for elem in root.findall('./Group/TestCase')] + + if len(result) < 2: + raise RuntimeError("Unexpectedly few tests listed (got {})".format( + len(result))) + return result + +def check_listed_tests_match(all_tests, sharded_tests): + # Make sure the number of tests in the full list, and the sharded lists are the same. + total_test_count = len(all_tests) + sharded_test_count = sum([len(shard) for shard in sharded_tests]) + + assert total_test_count == sharded_test_count, f"Sharded test count ({sharded_test_count}) does not match the total test count ({total_test_count})" + + # Make sure all the tests in the shards are from the full list, in the same order. Together with the previous check, this + # ensures that all tests in the full list, are in the shards without duplication. + test_index = 0 + for shard_index, shard in enumerate(sharded_tests): + for shard_test_index, test_name in enumerate(shard): + assert test_name == all_tests[test_index], f"Sharding does not split the test list while maintaining order {test_index}:\n'{test_name}' vs '{all_tests[test_index]}'" + + test_index += 1 + +def check_listed_and_executed_tests_match(listed_tests, executed_tests): + for shard_index, listed_shard in enumerate(listed_tests): + listed_shard_names = set(listed_shard) + executed_shard_names = set(executed_tests[shard_index]) + + listed_string = "\n".join(listed_shard_names) + exeucted_string = "\n".join(executed_shard_names) + + assert listed_shard_names == executed_shard_names, f"Executed tests do not match the listed tests:\nExecuted:\n{exeucted_string}\n\nListed:\n{listed_string}" + +def test_shards_cover_all_test(self_test_exe, test_case): + all_tests = list_tests(self_test_exe, test_case) + sharded_tests = [list_tests(self_test_exe, test_case, index) for index in range(test_case.shard_count)] + + check_listed_tests_match(all_tests, sharded_tests) + + executed_tests = [execute_tests(self_test_exe, test_case, index) for index in range(test_case.shard_count)] + + check_listed_and_executed_tests_match(sharded_tests, executed_tests) - indexes_in_longer = {s: i for i, s in enumerate(longer)} - for s1, s2 in zip(shorter, shorter[1:]): - assert indexes_in_longer[s1] < indexes_in_longer[s2], ( - '{} comes before {} in longer list.\n' - 'Longer: {}\nShorter: {}'.format(s2, s1, longer, shorter)) def main(): self_test_exe, = sys.argv[1:] - test_cases = [ - (1, 0), # default values, 1 shard, execute index 0 - (1, 1), # 1 shard, invalid index (should still execute using the last valid index) - (4, 2), # 4 shards, second index - ] - - # We want a random seed for the test, but want to avoid 0, - # because it has special meaning - seed = random.randint(1, 2 ** 32 - 1) + # We want a random seed for the test, but want to avoid 0, because it has special meaning + shard_counts = [1, 5] + seeds = [random.randint(1, 2 ** 32 - 1), random.randint(1, 2 ** 32 - 1)] + tags = [["generators"], ["generators", "matchers"], []] + orders = ["rand", "decl", "lex"] - list_one_tag = list_tests(self_test_exe, ['generators'], seed) - list_two_tags = list_tests(self_test_exe, ['generators', 'matchers'], seed) - list_all = list_tests(self_test_exe, [], seed) + test_cases = [TestCase(*t) for t in itertools.product(shard_counts, orders, tags, seeds)] - # First, verify that restricting to a subset yields the same order - check_is_sublist_of(list_two_tags, list_all) - check_is_sublist_of(list_one_tag, list_two_tags) + # We use multiprocessing here because there are quite a few test cases, and running them + # serially is slow + pool = multiprocessing.Pool() + pool.starmap(test_shards_cover_all_test, itertools.product([self_test_exe], test_cases)) if __name__ == '__main__': sys.exit(main())