SERVER-30828 Untag unreliable tests that are not running

2024-12-01 09:32:32 +01:00 · 2017-10-06 10:37:38 -04:00 · 2017-10-06 10:37:38 -04:00 · 0875727af8
commit 0875727af8
parent aff9c7e6af
2 changed files with 211 additions and 23 deletions
--- a/buildscripts/tests/test_update_test_lifecycle.py
+++ b/buildscripts/tests/test_update_test_lifecycle.py
@ -373,6 +373,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(num_pass=0, num_fail=1),
            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@ -382,7 +383,7 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, expected_tags)

@ -468,6 +469,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(num_pass=1, num_fail=0),
            self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
@ -477,10 +479,92 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, collections.OrderedDict())

+    def test_non_running_in_reliable_period_is_reliable(self):
+        """
+        Tests that tests that have a failure rate above the unacceptable rate during the unreliable
+        period but haven't run during the reliable period are marked as reliable.
+        """
+        # Unreliable period is 2 days: 2017-06-03 to 2017-06-04.
+        # Reliable period is 1 day: 2016-06-04.
+        reliable_period_date = datetime.date(2017, 6, 4)
+        config = self.CONFIG._replace(
+            test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1),
+            task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1),
+            variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1),
+            distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1),
+            unreliable_time_period=datetime.timedelta(days=2))
+
+        tests = ["jstests/core/all.js"]
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        # The test did not run on the reliable period on linux-64.
+        report = test_failures.Report([
+            # Failing.
+            self.ENTRY._replace(num_pass=0,
+                                num_fail=2),
+            # Passing on a different variant.
+            self.ENTRY._replace(start_date=reliable_period_date,
+                                end_date=reliable_period_date,
+                                num_pass=3,
+                                num_fail=0,
+                                variant="linux-alt",
+                                distro="debian7"),
+        ])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        # The tags for variant and distro have been removed.
+        self.assertEqual(updated_tags, collections.OrderedDict([
+            ("jstests/core/all.js", ["unreliable", "unreliable|jsCore_WT"])]))
+
+    def test_non_running_at_all_is_reliable(self):
+        """
+        Tests that tests that are tagged as unreliable but no longer running (either during the
+        reliable or the unreliable period) have their tags removed.
+        """
+        config = self.CONFIG
+
+        tests = ["jstests/core/all.js", "jstests/core/all2.js"]
+        initial_tags = collections.OrderedDict([
+            ("jstests/core/all2.js", [
+                "unreliable",
+                "unreliable|jsCore_WT",
+                "unreliable|jsCore_WT|linux-64",
+                "unreliable|jsCore_WT|linux-64|rhel62",
+            ]),
+        ])
+
+        lifecycle = ci_tags.TagsConfig.from_dict(
+            dict(selector=dict(js_test=copy.deepcopy(initial_tags))))
+        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
+        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))
+
+        # all2.js did not run at all
+        report = test_failures.Report([self.ENTRY])
+
+        update_test_lifecycle.validate_config(config)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
+        updated_tags = self.assert_has_only_js_tests(lifecycle)
+        # The tags for variant and distro have been removed.
+        self.assertEqual(updated_tags, collections.OrderedDict([]))
+
    def test_transition_test_from_unreliable_to_reliable(self):
        """
        Tests that update_tags() untags a formerly unreliable (test,) combination after it has
@ -571,6 +655,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(num_pass=1, num_fail=0),
            self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"),
@ -580,7 +665,7 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, initial_tags)

@ -609,6 +694,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(num_pass=0, num_fail=1),
            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@ -618,7 +704,7 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, initial_tags)

@ -660,6 +746,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
@ -677,7 +764,7 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, collections.OrderedDict([
            ("jstests/core/all.js", [
@ -707,6 +794,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(num_pass=0, num_fail=1),
            self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"),
@ -716,7 +804,7 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, initial_tags)

@ -745,6 +833,7 @@ class TestUpdateTags(unittest.TestCase):
        summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle)
        self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle))

+        tests = ["jstests/core/all.js"]
        report = test_failures.Report([
            self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)),
                                end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)),
@ -762,11 +851,53 @@ class TestUpdateTags(unittest.TestCase):
        ])

        update_test_lifecycle.validate_config(config)
-        update_test_lifecycle.update_tags(summary_lifecycle, config, report)
+        update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests)
        updated_tags = self.assert_has_only_js_tests(lifecycle)
        self.assertEqual(updated_tags, collections.OrderedDict())


+class TestCombinationHelpers(unittest.TestCase):
+    def test_from_entry(self):
+        entry = test_failures._ReportEntry(
+            "testA", "taskA", "variantA", "distroA",
+            datetime.date.today(),
+            datetime.date.today(), 0, 0)
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST)
+        self.assertEqual(combination, ("testA",))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK)
+        self.assertEqual(combination, ("testA", "taskA"))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK_VARIANT)
+        self.assertEqual(combination, ("testA", "taskA", "variantA"))
+
+        combination = update_test_lifecycle._test_combination_from_entry(
+            entry, test_failures.Report.TEST_TASK_VARIANT_DISTRO)
+        self.assertEqual(combination, ("testA", "taskA", "variantA", "distroA"))
+
+    def test_make_from_tag(self):
+        test = "testA"
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable")
+        self.assertEqual(combination, ("testA",))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA")
+        self.assertEqual(combination, ("testA", "taskA"))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA|variantA")
+        self.assertEqual(combination, ("testA", "taskA", "variantA"))
+
+        combination = update_test_lifecycle._test_combination_from_tag(
+            test, "unreliable|taskA|variantA|distroA")
+        self.assertEqual(combination, ("testA", "taskA", "variantA", "distroA"))
+
+
 class TestCleanUpTags(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
@ -847,11 +978,11 @@ class TestJiraIssueCreator(unittest.TestCase):
        desc = update_test_lifecycle.JiraIssueCreator._make_updated_tags_description(data)
        expected = ("- *js_test*\n"
                    "-- {{testfile1}}\n"
-                    "--- {{tag1}} (0.10 %)\n"
-                    "--- {{tag2}} (0.20 %)\n"
+                    "--- {{tag1}} (0.10)\n"
+                    "--- {{tag2}} (0.20)\n"
                    "-- {{testfile2}}\n"
-                    "--- {{tag1}} (0.10 %)\n"
-                    "--- {{tag3}} (0.30 %)")
+                    "--- {{tag1}} (0.10)\n"
+                    "--- {{tag3}} (0.30)")
        self.assertEqual(expected, desc)

    def test_description_empty(self):
--- a/buildscripts/update_test_lifecycle.py
+++ b/buildscripts/update_test_lifecycle.py
@ -12,6 +12,7 @@ import collections
 import datetime
 import logging
 import multiprocessing.dummy
+import operator
 import optparse
 import os.path
 import posixpath
@ -328,10 +329,35 @@ def validate_config(config):
                    name, time_period))


-def update_tags(lifecycle_tags, config, report):
+def _test_combination_from_entry(entry, components):
+    """Creates a test combination tuple from a tf._ReportEntry and target components.
+
+    Returns:
+        A tuple containing the entry fields specified in components.
    """
-    Updates the tags in 'lifecycle_tags' based on the historical test failures mentioned in
-    'report' according to the model described by 'config'.
+    combination = []
+    for component in components:
+        combination.append(operator.attrgetter(component)(entry))
+    return tuple(combination)
+
+
+def _test_combination_from_tag(test, tag):
+    """Creates a test combination tuple from a test name and a tag.
+
+    Returns:
+        A tuple containing the test name and the components found in the tag.
+    """
+    combination = [test]
+    for element in _split_tag(tag):
+        if element:
+            combination.append(element)
+    return tuple(combination)
+
+
+def update_tags(lifecycle_tags, config, report, tests):
+    """
+    Updates the tags in 'lifecycle_tags' based on the historical test failures of tests 'tests'
+    mentioned in 'report' according to the model described by 'config'.
    """

    # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used
@ -350,11 +376,30 @@ def update_tags(lifecycle_tags, config, report):
        # those components, etc.
        grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY)

+        # Create the reliable report.
+        # Filter out any test executions from prior to 'config.reliable_time_period'.
+        reliable_start_date = (report.end_date - config.reliable_time_period
+                               + datetime.timedelta(days=1))
+        reliable_entries = [entry for entry in grouped_entries
+                            if entry.start_date >= reliable_start_date]
+        reliable_report = tf.Report(reliable_entries)
+        reliable_combinations = {_test_combination_from_entry(entry, components)
+                                 for entry in reliable_entries}
+
+        # Create the unreliable report.
        # Filter out any test executions from prior to 'config.unreliable_time_period'.
+        # Also filter out any test that is not present in the reliable_report in order
+        # to avoid tagging as unreliable tests that are no longer running.
        unreliable_start_date = (report.end_date - config.unreliable_time_period
                                 + datetime.timedelta(days=1))
-        unreliable_report = tf.Report(entry for entry in grouped_entries
-                                      if entry.start_date >= unreliable_start_date)
+        unreliable_entries = [
+            entry for entry in grouped_entries
+            if (entry.start_date >= unreliable_start_date and
+                _test_combination_from_entry(entry, components) in reliable_combinations)
+        ]
+        unreliable_report = tf.Report(unreliable_entries)
+
+        # Update the tags using the unreliable report.
        update_lifecycle(lifecycle_tags,
                         unreliable_report.summarize_by(components),
                         unreliable_test,
@ -362,11 +407,7 @@ def update_tags(lifecycle_tags, config, report):
                         rates.unacceptable,
                         config.unreliable_min_runs)

-        # Filter out any test executions from prior to 'config.reliable_time_period'.
-        reliable_start_date = (report.end_date - config.reliable_time_period
-                               + datetime.timedelta(days=1))
-        reliable_report = tf.Report(entry for entry in grouped_entries
-                                    if entry.start_date >= reliable_start_date)
+        # Update the tags using the reliable report.
        update_lifecycle(lifecycle_tags,
                         reliable_report.summarize_by(components),
                         reliable_test,
@ -374,6 +415,22 @@ def update_tags(lifecycle_tags, config, report):
                         rates.acceptable,
                         config.reliable_min_runs)

+        def should_be_removed(test, tag):
+            combination = _test_combination_from_tag(test, tag)
+            if len(combination) != len(components):
+                # The tag is not for these components.
+                return False
+            return combination not in reliable_combinations
+
+        # Remove the tags that correspond to tests that have not run during the reliable period.
+        for test in tests:
+            tags = lifecycle_tags.lifecycle.get_tags("js_test", test)
+            for tag in tags[:]:
+                if should_be_removed(test, tag):
+                    LOGGER.info("Removing tag '%s' of test '%s' because the combination did not run"
+                                " during the reliable period", tag, test)
+                    lifecycle_tags.remove_tag("js_test", test, tag, failure_rate=0)
+

 def _split_tag(tag):
    """Split a tag into its components.
@ -566,7 +623,7 @@ class JiraIssueCreator(object):
                tags_lines.append("-- {0}".format(mono(test)))
                for tag in sorted(tags.keys()):
                    coefficient = tags[tag]
-                    tags_lines.append("--- {0} ({1:.2})".format(mono(tag), coefficient))
+                    tags_lines.append("--- {0} ({1:.2f})".format(mono(tag), coefficient))
        if tags_lines:
            return "\n".join(tags_lines)
        else:
@ -1111,7 +1168,7 @@ def main():
            continue
        history_data = test_history_source.get_history_data(tests, tasks)
        report = tf.Report(history_data)
-        update_tags(lifecycle_tags_file.changelog_lifecycle, config, report)
+        update_tags(lifecycle_tags_file.changelog_lifecycle, config, report, tests)

    # Remove tags that are no longer relevant
    clean_up_tags(lifecycle_tags_file.changelog_lifecycle, evg_conf)