From 5f9ff88131f03e1ce4acc74474c9b7da32aad5b7 Mon Sep 17 00:00:00 2001 From: Militsa Sotirova Date: Tue, 15 Aug 2023 15:55:13 +0000 Subject: [PATCH] SERVER-62509 Tests ABT for queries with thousands of args Co-authored-by: matt.boros@mongodb.com --- .eslintrc.yml | 4 + jstests/aggregation/query_limits_test.js | 241 +++++++++++++++++++++++ src/mongo/shell/utils.js | 5 + 3 files changed, 250 insertions(+) create mode 100644 jstests/aggregation/query_limits_test.js diff --git a/.eslintrc.yml b/.eslintrc.yml index d5312a02603..b282aeb6fdc 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -102,6 +102,10 @@ globals: isString: true _createSecurityToken: true _isAddressSanitizerActive: true + _isLeakSanitizerActive: true + _isThreadSanitizerActive: true + _isUndefinedBehaviorSanitizerActive: true + _optimizationsEnabled: true allocatePort: true allocatePorts: true resetAllocatedPorts: true diff --git a/jstests/aggregation/query_limits_test.js b/jstests/aggregation/query_limits_test.js new file mode 100644 index 00000000000..abbbca72402 --- /dev/null +++ b/jstests/aggregation/query_limits_test.js @@ -0,0 +1,241 @@ +/** + * Test that larger queries do not fail. This includes larger aggregation pipelines, as well as + * large $match/$project stages, deeply nested paths, and many predicates in an $and/$or. + * The size of these queries was found by trial and error until we reach the BSON size limit. + * + * @tags: [ + * # Can't wrap queries in facets without going past max BSON depth. + * do_not_wrap_aggregations_in_facets, + * ] + */ + +import {checkCascadesOptimizerEnabled} from "jstests/libs/optimizer_utils.js"; +import {checkSBEEnabled} from "jstests/libs/sbe_util.js"; + +(function() { +"use strict"; + +// Only run this test for debug=off opt=on without sanitizers active. With any of these activated, +// the stack frames are larger and can more easily stack overflow. +const debugBuild = db.adminCommand("buildInfo").debug; +if (debugBuild || !_optimizationsEnabled() || _isAddressSanitizerActive() || + _isLeakSanitizerActive() || _isThreadSanitizerActive() || + _isUndefinedBehaviorSanitizerActive()) { + jsTestLog("Returning early because debug is on, opt is off, or a sanitizer is enabled."); + return; +} + +const isBonsaiEnabled = checkCascadesOptimizerEnabled(db); +const isSBEEnabled = checkSBEEnabled(db); + +const coll = db.query_limits_test; +coll.drop(); + +// Multikey so we can't apply any non-multikey optimizations to stress as much as possible. +assert.commandWorked(coll.insert({_id: 0, a: [0, 1], b: [2, 3], c: 4, d: 5, object: {}})); + +function range(high) { + return [...Array(high).keys()]; +} + +function runAgg(pipeline) { + // Run pipeline to make sure it doesn't fail. + const result = coll.aggregate(pipeline).toArray(); +} + +// Construct a {$match: {a: {$in: [0, 1, 2, ...]}}}. +function testLargeIn() { + jsTestLog("Testing large $in"); + // Int limit is different than double limit. + const filterValsInts = range(1200000).map(i => NumberInt(i)); + runAgg([{$match: {a: {$in: filterValsInts}}}]); + + const filterValsDoubles = range(1000000).map(i => i * 1.0); + runAgg([{$match: {a: {$in: filterValsDoubles}}}]); +} + +// Construct a {$project: {a0: 1, a1: 1, ...}}. +function testLargeProject() { + jsTestLog("Testing large $project"); + const projectFields = {}; + range(1000000).forEach(function(i) { + projectFields["a" + i] = NumberInt(1); + }); + runAgg([{$project: projectFields}]); + + const pathSize = 1000000; + let nestedProjectField = "a0"; + for (let i = 1; i < pathSize; i++) { + nestedProjectField += ".a" + i; + } + runAgg([{$project: {nestedProjectField: 1}}]); +} + +// Run $and and $or with many different types of predicates. +function testLargeAndOrPredicates() { + // TODO: SERVER-78635 remove this early return once this ticket is done. This is a + // Bonsai-specific issues with PSR. + if (isBonsaiEnabled) { + return; + } + // TODO: SERVER-78587 remove the SBE check. This is an issue with compiling expressions to the + // SBE VM, so it affects stage builders and Bonsai. + if (isSBEEnabled) { + return; + } + jsTestLog("Testing large $and/$or predicates"); + + // TODO: SERVER-79092 uncomment this test. This is an issue with match expression + // auto-parameterization. This affects both classic and SBE. + // Large $match of the form {$match: {a0: 1, a1: 1, ...}} + // const largeMatch = {}; + // range(1200000).forEach(function(i) { + // largeMatch["a" + i] = NumberInt(1); + // }); + // runAgg([{$match: largeMatch}]); + + // function intStream(n) { + // return range(n).map(i => NumberInt(i)); + // } + + // const andOrFilters = [ + // // Plain a=i filter. + // intStream(800000).map(function(i) { + // return {a: i}; + // }), + // // a_i = i filter. Different field for each value. + // intStream(600000).map(function(i) { + // const field = "a" + i; + // return {[field]: i}; + // }), + // // Mix of lt and gt with the same field. + // intStream(500000).map(function(i) { + // const predicate = i % 2 ? {$lt: i} : {$gt: i}; + // return {a: predicate}; + // }), + // // Mix of lt and gt with different fields. + // intStream(400000).map(function(i) { + // const field = "a" + i; + // const predicate = i % 2 ? {$lt: i} : {$gt: i}; + // return {[field]: predicate}; + // }), + // // Mix of lt and gt wrapped in not with different fields. + // intStream(300000).map(function(i) { + // const field = "a" + i; + // const predicate = i % 2 ? {$lt: i} : {$gt: i}; + // return {[field]: {$not: predicate}}; + // }), + // // $exists on different fields. + // intStream(400000).map(function(i) { + // const field = "a" + i; + // return {[field]: {$exists: true}}; + // }), + // intStream(400000).map(function(i) { + // const field = "a" + i; + // return {[field]: {$exists: false}}; + // }) + // ]; + // for (const m of andOrFilters) { + // runAgg([{$match: {$and: m}}]); + // runAgg([{$match: {$or: m}}]); + // } +} + +// Test deeply nested queries. +function testDeeplyNestedPath() { + jsTestLog("Testing deeply nested $match"); + let deepQuery = {a: {$eq: 1}}; + const depth = 72; + for (let i = 0; i < depth; i++) { + deepQuery = {a: {$elemMatch: deepQuery}}; + } + runAgg([{$match: deepQuery}]); +} + +// Test pipeline length. +function testPipelineLimits() { + jsTestLog("Testing large agg pipelines"); + const pipelineLimit = 1000; + let stages = [ + {$limit: 1}, + {$skip: 1}, + {$sort: {a: 1}}, + {$unwind: "$a"}, + {$match: {a: {$mod: [4, 2]}}}, + {$group: {_id: "$a"}}, + {$addFields: {c: {$add: ["$c", "$d"]}}}, + {$addFields: {a: 5}}, + // TODO SERVER-78354: Uncomment this test and ensure it passes in the + // aggregation_disabled_optimization suite. + // {$match: {a: 1}} + ]; + + if (!isBonsaiEnabled) { + // TODO: SERVER-78354 should move $project, $addFields, and $unwind to "stages" so $project + // runs with Bonsai. This is an issue with the reference tracker. + stages.push({$project: {a: 1}}); + } + for (const stage of stages) { + const pipeline = range(pipelineLimit).map(_ => stage); + jsTestLog(stage); + runAgg(pipeline); + } +} + +/* + * Generates a $match query with specified branchingFactor and maxDepth of the form + * {$and: [{$or: [... $and ...]}, ... (length branchingFactor) ...]} + * Uses unique field names across the generated query. + */ +let fieldIndex = 0; +function generateNestedAndOrHelper(type, branchingFactor, maxDepth) { + if (maxDepth === 0) { + const field = 'a' + fieldIndex; + const query = {[field]: NumberInt(fieldIndex)}; + fieldIndex++; + return query; + } + + const oppositeType = type === '$and' ? '$or' : '$and'; + const children = []; + for (let i = 0; i < branchingFactor; i++) { + const childQuery = generateNestedAndOrHelper(oppositeType, branchingFactor, maxDepth - 1); + children.push(childQuery); + } + + return {[type]: children}; +} + +function generateNestedAndOr(type, branchingFactor, maxDepth) { + fieldIndex = 0; + return generateNestedAndOrHelper(type, branchingFactor, maxDepth); +} + +function testNestedAndOr() { + // TODO: SERVER-79092 uncomment this test. This is an issue with match expression + // auto-parameterization. jsTestLog("Testing nested $and/$or"); for (const topLevelType of + // ['$and', '$or']) { + // // Test different types of nested queries + // let [branchingFactor, maxDepth] = [3, 10]; + // const deepNarrowQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth); + // runAgg([{$match: deepNarrowQuery}]); + + // [branchingFactor, maxDepth] = [10, 5]; + // const shallowWideQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth); + // runAgg([{$match: shallowWideQuery}]); + // } +} + +const tests = [ + testLargeIn, + testLargeProject, + testLargeAndOrPredicates, + testDeeplyNestedPath, + testNestedAndOr, + testPipelineLimits +]; + +for (const test of tests) { + test(); +} +})(); \ No newline at end of file diff --git a/src/mongo/shell/utils.js b/src/mongo/shell/utils.js index e8e840c6639..89fc7b1ef54 100644 --- a/src/mongo/shell/utils.js +++ b/src/mongo/shell/utils.js @@ -319,6 +319,11 @@ if (typeof TestData == "undefined") { TestData = undefined; } +function _optimizationsEnabled(flags) { + const sanitizeMatch = /(\s|^)-O2(\s|$)/.exec(getBuildInfo()["buildEnvironment"]["ccflags"]); + return Boolean(sanitizeMatch); +} + function __sanitizeMatch(flag) { var sanitizeMatch = /-fsanitize=([^\s]+) /.exec(getBuildInfo()["buildEnvironment"]["ccflags"]); if (flag && sanitizeMatch && RegExp(flag).exec(sanitizeMatch[1])) {