From d3bfe8c0ecf6374b26f3d08405503c07974e9e7f Mon Sep 17 00:00:00 2001 From: Ryan Berryhill Date: Mon, 19 Aug 2024 15:06:48 -0400 Subject: [PATCH] SERVER-86578 Enable TSAN all feature flags variant (#25909) GitOrigin-RevId: 040a4b10d87a85c2cd17119373aa9636eca36d57 --- buildscripts/evergreen_resmoke_job_count.py | 12 ++ .../clusters_and_integrations/tasks.yml | 2 +- .../variants/sanitizer/test_dev.yml | 141 ++++++++++-------- .../sanitizer/test_dev_master_branch_only.yml | 110 ++++++++++++++ etc/tsan.suppressions | 7 + .../aggregation/expressions/regex_limits.js | 4 + ..._moveChunk_timeseries_arbitrary_updates.js | 2 + ...andom_moveChunk_timeseries_meta_updates.js | 2 + 8 files changed, 217 insertions(+), 63 deletions(-) diff --git a/buildscripts/evergreen_resmoke_job_count.py b/buildscripts/evergreen_resmoke_job_count.py index b9e73b9898b..b412c1f5faf 100644 --- a/buildscripts/evergreen_resmoke_job_count.py +++ b/buildscripts/evergreen_resmoke_job_count.py @@ -42,6 +42,18 @@ VARIANT_TASK_FACTOR_OVERRIDES = { {"task": r"fcv_upgrade_downgrade_sharded_collections_jscore_passthrough.*", "factor": 0.27}, {"task": r"shard.*uninitialized_fcv_jscore_passthrough.*", "factor": 0.125}, ], + "enterprise-rhel8-debug-tsan-all-feature-flags": [ + # Lower the default resmoke_jobs_factor for TSAN to reduce memory pressure for this suite, + # as otherwise TSAN variants occasionally run out of memory. + # The all feature flags variant sometimes needs more aggressive reductions than the no + # feature flags variant. + { + "task": r"fcv_upgrade_downgrade_sharded_collections_jscore_passthrough.*", + "factor": 0.125, + }, + {"task": r"fcv_upgrade_downgrade_replica_sets_jscore_passthrough.*", "factor": 0.27}, + {"task": r"shard.*uninitialized_fcv_jscore_passthrough.*", "factor": 0.125}, + ], "rhel8-debug-aubsan-classic-engine": [ {"task": r"shard.*uninitialized_fcv_jscore_passthrough.*", "factor": 0.25} ], diff --git a/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml b/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml index 53613cbaa59..486d377be45 100644 --- a/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml +++ b/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml @@ -416,7 +416,7 @@ tasks: commands: - func: "generate resmoke tasks" vars: - use_large_distro: "true" + use_xlarge_distro: "true" resmoke_jobs_max: 1 - <<: *gen_task_template diff --git a/etc/evergreen_yml_components/variants/sanitizer/test_dev.yml b/etc/evergreen_yml_components/variants/sanitizer/test_dev.yml index cff897ae59e..7cf5cb7e018 100644 --- a/etc/evergreen_yml_components/variants/sanitizer/test_dev.yml +++ b/etc/evergreen_yml_components/variants/sanitizer/test_dev.yml @@ -170,6 +170,72 @@ variables: multiversion_edition: enterprise gcov_tool: /opt/mongodbtoolchain/v4/bin/gcov + # THIS HAS COPIES IN + # - etc/evergreen_yml_components/variants/sanitizer/test_dev.yml + # - etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml + # ANY MODIFICATIONS HERE SHOULD ALSO BE MADE IN THOSE FILES + - &enterprise-rhel8-debug-tsan-compile-dependency + depends_on: + - name: archive_dist_test_debug + variant: &enterprise-rhel8-debug-tsan-compile enterprise-rhel8-debug-tsan-compile + - name: archive_jstestshell + variant: *enterprise-rhel8-debug-tsan-compile + - name: version_gen + variant: generate-tasks-for-version + # This is added because of EVG-18211. + # Without this we are adding extra dependencies on evergreen and it is causing strain + omit_generated_tasks: true + + # THIS HAS COPIES IN + # - etc/evergreen_yml_components/variants/sanitizer/test_dev.yml + # - etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml + # ANY MODIFICATIONS HERE SHOULD ALSO BE MADE IN THOSE FILES + - &enterprise-rhel8-debug-tsan-expansions-template + additional_package_targets: >- + archive-mongocryptd + archive-mongocryptd-debug + lang_environment: LANG=C + toolchain_version: stable + compile_variant: *enterprise-rhel8-debug-tsan-compile + # If you add anything to san_options, make sure the appropriate + # changes are also made to SConstruct. + # + # TODO SERVER-49121: die_after_fork=0 is a temporary setting to + # allow tests to continue while we figure out why we're running + # afoul of it. + # + # TODO SERVER-65936: report_thread_leaks=0 suppresses reporting + # thread leaks, which we have because we don't do a clean shutdown + # of the ServiceContext. + # + # Run TSAN with diagnostic latches off, as our diagnostics reduce + # TSAN's visibility - see SERVER-88159. + # + san_options: TSAN_OPTIONS="abort_on_error=1:disable_coredump=0:handle_abort=1:halt_on_error=1:report_thread_leaks=0:die_after_fork=0:history_size=4:suppressions=etc/tsan.suppressions:external_symbolizer_path=/opt/mongodbtoolchain/v4/bin/llvm-symbolizer" + build_mongot: true + download_mongot_release: true + compile_flags: >- + --variables-files=etc/scons/mongodbtoolchain_${toolchain_version}_clang.vars + --dbg=on + --opt=on + --allocator=system + --sanitize=thread + --ssl + --use-libunwind=off + --link-model=dynamic + -j$(grep -c ^processor /proc/cpuinfo) + --use-diagnostic-latches=off + # Avoid starting too many mongod's under TSAN build. + resmoke_jobs_factor: 0.3 + scons_cache_scope: shared + separate_debug: off + large_distro_name: &enterprise-rhel8-debug-tsan-large-distro-name rhel8.8-xlarge + xlarge_distro_name: rhel8.8-xxlarge + core_analyzer_distro_name: rhel8.8-xxlarge + multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 + multiversion_edition: enterprise + buildvariants: - <<: *generic_linux_compile_params name: &linux-x86-dynamic-debug-compile-required linux-x86-dynamic-debug-compile-required # TODO: replace with Sanitizer. @@ -415,62 +481,15 @@ buildvariants: tasks: - name: compile_archive_and_run_libfuzzertests_TG - - &enterprise-rhel8-debug-tsan-template - name: &enterprise-rhel8-debug-tsan-compile enterprise-rhel8-debug-tsan-compile + - name: &enterprise-rhel8-debug-tsan-compile enterprise-rhel8-debug-tsan-compile display_name: "* TSAN Enterprise RHEL 8 DEBUG Compile" tags: ["suggested"] cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - rhel8.8-large stepback: false - expansions: &enterprise-rhel8-debug-tsan-expansions-template - additional_package_targets: >- - archive-mongocryptd - archive-mongocryptd-debug - lang_environment: LANG=C - toolchain_version: stable - compile_variant: *enterprise-rhel8-debug-tsan-compile - # If you add anything to san_options, make sure the appropriate - # changes are also made to SConstruct. - # - # TODO SERVER-49121: die_after_fork=0 is a temporary setting to - # allow tests to continue while we figure out why we're running - # afoul of it. - # - # TODO SERVER-65936: report_thread_leaks=0 suppresses reporting - # thread leaks, which we have because we don't do a clean shutdown - # of the ServiceContext. - # - # Run TSAN with diagnostic latches off, as our diagnostics reduce - # TSAN's visibility - see SERVER-88159. - # - san_options: TSAN_OPTIONS="abort_on_error=1:disable_coredump=0:handle_abort=1:halt_on_error=1:report_thread_leaks=0:die_after_fork=0:history_size=4:suppressions=etc/tsan.suppressions:external_symbolizer_path=/opt/mongodbtoolchain/v4/bin/llvm-symbolizer" - build_mongot: true - download_mongot_release: true - compile_flags: >- - --variables-files=etc/scons/mongodbtoolchain_${toolchain_version}_clang.vars - --dbg=on - --opt=on - --allocator=system - --sanitize=thread - --ssl - --use-libunwind=off - --link-model=dynamic - -j$(grep -c ^processor /proc/cpuinfo) - --use-diagnostic-latches=off - # Avoid starting too many mongod's under TSAN build. - resmoke_jobs_factor: 0.3 - scons_cache_scope: shared - separate_debug: off - large_distro_name: &enterprise-rhel8-debug-tsan-large-distro-name rhel8.8-xlarge - xlarge_distro_name: rhel8.8-xxlarge - core_analyzer_distro_name: rhel8.8-xxlarge - multiversion_platform: rhel80 - multiversion_platform_50_or_later: rhel8 - multiversion_edition: enterprise - test_flags: >- - --mongodSetParameters="{internalQueryEnableAggressiveSpillsInGroup: true, receiveChunkWaitForRangeDeleterTimeoutMS: 180000}" - --excludeWithAnyTags=tsan_incompatible,requires_latch_analyzer + expansions: + <<: *enterprise-rhel8-debug-tsan-expansions-template tasks: - name: search_end_to_end_sharded_cluster - name: search_end_to_end_single_node @@ -488,21 +507,19 @@ buildvariants: distros: - rhel8.8-xlarge - - <<: *enterprise-rhel8-debug-tsan-template + - <<: *enterprise-rhel8-debug-tsan-compile-dependency name: &enterprise-rhel8-debug-tsan enterprise-rhel8-debug-tsan display_name: "* TSAN Enterprise RHEL 8 DEBUG" tags: ["suggested"] - cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. - depends_on: - - name: archive_dist_test_debug - variant: *enterprise-rhel8-debug-tsan-compile - - name: archive_jstestshell - variant: *enterprise-rhel8-debug-tsan-compile - - name: version_gen - variant: generate-tasks-for-version - # This is added because of EVG-18211. - # Without this we are adding extra dependencies on evergreen and it is causing strain - omit_generated_tasks: true + cron: "0 4 1-31/2 * *" # For cost reasons, we run this variant every other day. + run_on: + - rhel8.8-large + stepback: false + expansions: + <<: *enterprise-rhel8-debug-tsan-expansions-template + test_flags: >- + --mongodSetParameters="{internalQueryEnableAggressiveSpillsInGroup: true, receiveChunkWaitForRangeDeleterTimeoutMS: 180000}" + --excludeWithAnyTags=tsan_incompatible,requires_latch_analyzer tasks: - name: .development_critical !.requires_large_host !.requires_compile_variant !.requires_large_host_tsan !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.requires_all_feature_flags - name: .development_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.requires_all_feature_flags diff --git a/etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml b/etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml index a7d7f7a00f9..10509e15bc9 100644 --- a/etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml +++ b/etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml @@ -127,6 +127,72 @@ variables: multiversion_edition: enterprise gcov_tool: /opt/mongodbtoolchain/v4/bin/gcov + # THIS HAS COPIES IN + # - etc/evergreen_yml_components/variants/sanitizer/test_dev.yml + # - etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml + # ANY MODIFICATIONS HERE SHOULD ALSO BE MADE IN THOSE FILES + - &enterprise-rhel8-debug-tsan-compile-dependency + depends_on: + - name: archive_dist_test_debug + variant: &enterprise-rhel8-debug-tsan-compile enterprise-rhel8-debug-tsan-compile + - name: archive_jstestshell + variant: *enterprise-rhel8-debug-tsan-compile + - name: version_gen + variant: generate-tasks-for-version + # This is added because of EVG-18211. + # Without this we are adding extra dependencies on evergreen and it is causing strain + omit_generated_tasks: true + + # THIS HAS COPIES IN + # - etc/evergreen_yml_components/variants/sanitizer/test_dev.yml + # - etc/evergreen_yml_components/variants/sanitizer/test_dev_master_branch_only.yml + # ANY MODIFICATIONS HERE SHOULD ALSO BE MADE IN THOSE FILES + - &enterprise-rhel8-debug-tsan-expansions-template + additional_package_targets: >- + archive-mongocryptd + archive-mongocryptd-debug + lang_environment: LANG=C + toolchain_version: stable + compile_variant: *enterprise-rhel8-debug-tsan-compile + # If you add anything to san_options, make sure the appropriate + # changes are also made to SConstruct. + # + # TODO SERVER-49121: die_after_fork=0 is a temporary setting to + # allow tests to continue while we figure out why we're running + # afoul of it. + # + # TODO SERVER-65936: report_thread_leaks=0 suppresses reporting + # thread leaks, which we have because we don't do a clean shutdown + # of the ServiceContext. + # + # Run TSAN with diagnostic latches off, as our diagnostics reduce + # TSAN's visibility - see SERVER-88159. + # + san_options: TSAN_OPTIONS="abort_on_error=1:disable_coredump=0:handle_abort=1:halt_on_error=1:report_thread_leaks=0:die_after_fork=0:history_size=4:suppressions=etc/tsan.suppressions:external_symbolizer_path=/opt/mongodbtoolchain/v4/bin/llvm-symbolizer" + build_mongot: true + download_mongot_release: true + compile_flags: >- + --variables-files=etc/scons/mongodbtoolchain_${toolchain_version}_clang.vars + --dbg=on + --opt=on + --allocator=system + --sanitize=thread + --ssl + --use-libunwind=off + --link-model=dynamic + -j$(grep -c ^processor /proc/cpuinfo) + --use-diagnostic-latches=off + # Avoid starting too many mongod's under TSAN build. + resmoke_jobs_factor: 0.3 + scons_cache_scope: shared + separate_debug: off + large_distro_name: &enterprise-rhel8-debug-tsan-large-distro-name rhel8.8-xlarge + xlarge_distro_name: rhel8.8-xxlarge + core_analyzer_distro_name: rhel8.8-xxlarge + multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 + multiversion_edition: enterprise + buildvariants: ########################################### # Redhat buildvariants # @@ -506,6 +572,50 @@ buildvariants: - rhel8.8-xlarge - name: .benchmarks + - <<: *enterprise-rhel8-debug-tsan-compile-dependency + name: &enterprise-rhel8-debug-tsan-all-feature-flags enterprise-rhel8-debug-tsan-all-feature-flags + display_name: "* TSAN Enterprise RHEL 8 DEBUG (all feature flags)" + tags: ["suggested"] + cron: "0 4 1-31/2 * *" # For cost reasons, we run this variant every other day. + run_on: + - rhel8.8-large + stepback: false + expansions: + <<: *enterprise-rhel8-debug-tsan-expansions-template + test_flags: >- + --mongodSetParameters="{internalQueryEnableAggressiveSpillsInGroup: true, receiveChunkWaitForRangeDeleterTimeoutMS: 180000}" + --excludeWithAnyTags=tsan_incompatible,requires_latch_analyzer + --runAllFeatureFlagTests + tasks: + - name: .development_critical !.requires_large_host !.requires_compile_variant !.requires_large_host_tsan !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + - name: .development_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .development_critical .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .release_critical !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .release_critical .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .default !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + - name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .default .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .non_deterministic !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + - name: .non_deterministic .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: .non_deterministic .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags + distros: + - *enterprise-rhel8-debug-tsan-large-distro-name + - name: &enterprise-rhel8-unoptimized-64-bit enterprise-rhel8-unoptimized-64-bit display_name: "~ Enterprise RHEL 8 DEBUG (Unoptimized)" tags: ["bazel_check"] diff --git a/etc/tsan.suppressions b/etc/tsan.suppressions index 5bd019f31a5..bc20b60feb3 100644 --- a/etc/tsan.suppressions +++ b/etc/tsan.suppressions @@ -79,3 +79,10 @@ race:mongo::AuthorizationManager::get # TODO SERVER-91877 inversion b/w SSL manager and some crypto mutex deadlock:mongo::(anonymous namespace)::SSLManagerOpenSSL::SSLManagerOpenSSL + +# TODO SERVER-93363 +race:S2Loop::FindVertex + +# TODO SERVER-93364 +race:mongo::Client::setService +race:mongo::Client::getService diff --git a/jstests/aggregation/expressions/regex_limits.js b/jstests/aggregation/expressions/regex_limits.js index 665ccea9c66..c38b3d5813e 100644 --- a/jstests/aggregation/expressions/regex_limits.js +++ b/jstests/aggregation/expressions/regex_limits.js @@ -1,5 +1,9 @@ /** * Tests to validate limits for $regexFind, $regexFindAll and $regexMatch aggregation expressions. + * @tags: [ + * # TODO SERVER-93378 investigate why this test is significantly slower with TSAN. + * tsan_incompatible, + * ] */ import "jstests/libs/sbe_assert_error_override.js"; diff --git a/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_arbitrary_updates.js b/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_arbitrary_updates.js index 310315a04ad..33f984d8b84 100644 --- a/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_arbitrary_updates.js +++ b/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_arbitrary_updates.js @@ -7,6 +7,8 @@ * requires_non_retryable_writes, * does_not_support_transactions, * featureFlagTimeseriesUpdatesSupport, + * # TODO SERVER-93382 investigate excessive resource usage under TSAN. + * tsan_incompatible, * ] */ import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js"; diff --git a/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_meta_updates.js b/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_meta_updates.js index 099dd5efcf6..2e9075db97a 100644 --- a/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_meta_updates.js +++ b/jstests/concurrency/fsm_workloads/random_moveChunk_timeseries_meta_updates.js @@ -7,6 +7,8 @@ * requires_non_retryable_writes, * does_not_support_transactions, * requires_fcv_51, + * # TODO SERVER-93382 investigate excessive resource usage under TSAN. + * tsan_incompatible, * ] */ import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";