0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-24 16:46:00 +01:00

SERVER-89308 Create query properties tester (#21800)

GitOrigin-RevId: c6d23f0facff71b23f0c764eee8d41ec8621a1d6
This commit is contained in:
Matthew Boros 2024-06-14 14:54:34 -04:00 committed by MongoDB Bot
parent a380ae67ff
commit 913e657391
7 changed files with 580 additions and 11 deletions

View File

@ -205,6 +205,7 @@ globals:
bsonUnorderedFieldsCompare: true
bsonBinaryEqual: true
friendlyEqual: true
unorderedFriendlyEqual: true
timestampCmp: true
decompressBSONColumn: true

View File

@ -317,16 +317,20 @@ export function getPlanStage(root, stage) {
* This helper function can be used for any optimizer.
*/
export function getRejectedPlans(root) {
if (root.queryPlanner.winningPlan.hasOwnProperty("shards")) {
const rejectedPlans = [];
for (let shard of root.queryPlanner.winningPlan.shards) {
for (let rejectedPlan of shard.rejectedPlans) {
rejectedPlans.push(Object.assign({shardName: shard.shardName}, rejectedPlan));
if (root.hasOwnProperty('queryPlanner')) {
if (root.queryPlanner.winningPlan.hasOwnProperty("shards")) {
const rejectedPlans = [];
for (let shard of root.queryPlanner.winningPlan.shards) {
for (let rejectedPlan of shard.rejectedPlans) {
rejectedPlans.push(Object.assign({shardName: shard.shardName}, rejectedPlan));
}
}
return rejectedPlans;
}
return rejectedPlans;
return root.queryPlanner.rejectedPlans;
} else {
return root.stages[0]['$cursor'].queryPlanner.rejectedPlans;
}
return root.queryPlanner.rejectedPlans;
}
/**

View File

@ -0,0 +1,182 @@
/*
* Fast-check models for aggregation pipelines and index definitions. Works for time-series
* collections but also is general enough for regular collections.
*/
import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
// ------------------------------------- Aggregation Arbitraries -----------------------------------
// .oneof() arguments are ordered from least complex to most, since fast-check uses this ordering to
// shrink.
const scalarArb = fc.oneof(fc.constant(null),
fc.boolean(),
fc.integer({min: -99, max: 99}),
// Strings starting with `$` can be confused with fields.
fc.string().filter(s => !s.startsWith('$')),
fc.date());
const fieldArb = fc.constantFrom('t', 'm', 'm.m1', 'm.m2', 'a', 'b', 'array');
const assignableFieldArb = fc.constantFrom('m', 't', 'a', 'b');
const dollarFieldArb = fieldArb.map(f => "$" + f);
const comparisonArb = fc.constantFrom('$eq', '$lt', '$lte', '$gt', '$gte');
const accumulatorArb = fc.constantFrom('$count', '$min', '$max', '$minN', '$maxN', '$sum');
// Inclusion/Exclusion projections. {$project: {_id: 1, a: 0}}
const projectArb = fc.tuple(fieldArb, fc.boolean()).map(function([field, includeField]) {
return {$project: {_id: 1, [field]: includeField}};
});
// Project from one field to another. {$project {a: '$b'}}
const computedProjectArb = fc.tuple(fieldArb, dollarFieldArb).map(function([destField, srcField]) {
return {$project: {[destField]: srcField}};
});
// Add field with a constant argument. {$addFields: {a: 5}}
const addFieldsConstArb = fc.tuple(fieldArb, scalarArb).map(function([destField, scalar]) {
return {$addFields: {[destField]: scalar}};
});
// Add field from source field. {$addFields: {a: '$b'}}
const addFieldsVarArb = fc.tuple(fieldArb, dollarFieldArb).map(function([destField, sourceField]) {
return {$addFields: {[destField]: sourceField}};
});
// Single leaf predicate of a $match. {a: {$eq: 5}}
const simpleMatchLeafPredicate =
fc.tuple(fieldArb, comparisonArb, scalarArb).map(function([field, cmp, cmpVal]) {
return {[field]: {[cmp]: cmpVal}};
});
// {a: {$in: [1,2,3]}}
const inMatchPredicate = fc.tuple(fieldArb, fc.array(scalarArb, {minLength: 0, maxLength: 5}))
.map(function([field, inVals]) {
return {[field]: {$in: inVals}};
});
const matchLeafPredicate = fc.oneof(simpleMatchLeafPredicate, inMatchPredicate);
// Arbitrary $match expression that may contain nested logical operations, or just leaves.
// {$match: {a: {$eq: 5}}}, {$match: {$and: [{a: {$eq: 5}}, {b: {$eq: 6}}]}}
// $or and $nor are only allowed if `allowOrs` is true.
function getMatchArb(allowOrs) {
const logicalOpArb = allowOrs ? fc.constantFrom('$and', '$or', '$nor') : fc.constant('$and');
const predicateArb =
fc.letrec(
tie => ({
compoundPred: fc.tuple(logicalOpArb,
fc.array(tie('predicate'), {minLength: 1, maxLength: 3}))
.map(([logicalOp, children]) => {
return {[logicalOp]: children};
}),
predicate: fc.oneof({maxDepth: 5}, matchLeafPredicate, tie('compoundPred'))
}))
.predicate;
return fc.array(predicateArb, {minLength: 1, maxLength: 5}).map((predicates) => {
// Merge all the predicates into one object.
const mergedPredicates = Object.assign(...predicates);
return {$match: mergedPredicates};
});
}
const sortArb = fc.tuple(fieldArb, fc.constantFrom(1, -1)).map(function([field, sortOrder]) {
// TODO SERVER-91164 sort on multiple fields
return {$sort: {[field]: sortOrder}};
});
// TODO SERVER-91164 include $top/$bottom and other accumulators, allow null as the groupby argument
// {$group: {_id: '$a', b: {$min: '$c'}}}
const groupArb =
fc.tuple(
dollarFieldArb, assignableFieldArb, accumulatorArb, dollarFieldArb, fc.integer({min: 1}))
.map(function([gbField, outputField, acc, dataField, minMaxNumResults]) {
let accSpec;
if (acc === '$count') {
accSpec = {[acc]: {}};
} else if (acc === '$minN' || acc === '$maxN') {
accSpec = {[acc]: {input: dataField, n: minMaxNumResults}};
} else {
accSpec = {[acc]: dataField};
}
return {$group: {_id: gbField, [outputField]: accSpec}};
});
// Arbitrary for single stage.
function getAggStageArb(allowOrs) {
// TODO SERVER-91164 include $limit
return fc.oneof(
// TODO SERVER-91405 re-enable projections in model.
// projectArb,
getMatchArb(allowOrs),
addFieldsConstArb,
computedProjectArb,
addFieldsVarArb,
sortArb,
groupArb);
}
// Our full model for aggregation pipelines. Length 6 seems long enough to cover interactions
// between stages.
export const aggPipelineModel =
fc.array(getAggStageArb(true /* allowOrs */), {minLength: 0, maxLength: 6});
export const aggPipelineNoOrsModel =
fc.array(getAggStageArb(false /* allowOrs */), {minLength: 0, maxLength: 6});
// ------------------------------------- Index Def Arbitraries -----------------------------------
const indexFieldArb = fc.constantFrom('_id', 't', 'm', 'm.m1', 'm.m2', 'a', 'b', 'array');
// Regular indexes
// Tuple of indexed field, and it's sort direction.
const singleIndexDefArb = fc.tuple(indexFieldArb, fc.constantFrom(1, -1));
// Unique array of [[a, true], [b, false], ...] to be mapped to an index definition. Unique on the
// indexed field. Filter out any indexes that only use the _id field.
const arrayOfSingleIndexDefsArb = fc.uniqueArray(singleIndexDefArb, {
minLength: 1,
maxLength: 5,
selector: fieldAndSort => fieldAndSort[0],
}).filter(arrayOfIndexDefs => {
// We can run into errors if we try to make an {_id: -1} index.
if (arrayOfIndexDefs.length === 1 && arrayOfIndexDefs[0][0] === '_id') {
return false;
}
return true;
});
const simpleIndexDefArb = arrayOfSingleIndexDefsArb.map(arrayOfIndexDefs => {
// Convert to a valid index definition structure.
let fullDef = {};
for (const [field, sortDirection] of arrayOfIndexDefs) {
fullDef = Object.assign(fullDef, {[field]: sortDirection});
}
return fullDef;
});
const simpleIndexOptionsArb = fc.constantFrom({}, {sparse: true});
const simpleIndexDefAndOptionsArb = fc.tuple(simpleIndexDefArb, simpleIndexOptionsArb);
// Hashed indexes
const hashedIndexDefArb =
fc.tuple(arrayOfSingleIndexDefsArb, fc.integer({min: 0, max: 4 /* Inclusive */}))
.map(([arrayOfIndexDefs, positionOfHashed]) => {
// Inputs are an index definition, and the position of the hashed field in the index
// def.
positionOfHashed %= arrayOfIndexDefs.length;
let fullDef = {};
let i = 0;
for (const [field, sortDir] of arrayOfIndexDefs) {
const sortDirOrHashed = i === positionOfHashed ? 'hashed' : sortDir;
fullDef = Object.assign(fullDef, {[field]: sortDirOrHashed});
i++;
}
return fullDef;
})
.filter(fullDef => {
// Can't create hashed index on array field.
return !Object.keys(fullDef).includes('array');
});
// No index options for hashed or wildcard indexes.
const hashedIndexDefAndOptionsArb = fc.tuple(hashedIndexDefArb, fc.constant({}));
// Wildcard indexes. TODO SERVER-91164 expand coverage.
const wildcardIndexDefAndOptionsArb = fc.tuple(fc.constant({"$**": 1}), fc.constant({}));
// Map to an object with the definition and options, so it's more clear what each object is.
const indexInfoArb =
fc.oneof(
simpleIndexDefAndOptionsArb, wildcardIndexDefAndOptionsArb, hashedIndexDefAndOptionsArb)
.map(([def, options]) => {
return {def, options};
});
export const listOfIndexesModel = fc.array(indexInfoArb, {minLength: 0, maxLength: 7});

View File

@ -0,0 +1,183 @@
import {getRejectedPlans} from "jstests/libs/analyze_plan.js";
import {
aggPipelineModel,
aggPipelineNoOrsModel,
} from "jstests/libs/property_test_helpers/query_models.js";
import {checkSbeFullyEnabled} from "jstests/libs/sbe_util.js";
/*
* Properties take the collection we're testing, a list of queries to use during the property test,
* and some helpers which include a comparator, a control collection, etc.
*/
// Motivation: Query correctness.
function queryHasSameResultsAsControlCollScan(experimentColl, queries, testHelpers) {
for (let i = 0; i < queries.length; i++) {
const query = queries[i];
const controlResults = testHelpers.controlColl.aggregate(query).toArray();
const experimentalResults = experimentColl.aggregate(query).toArray();
if (!testHelpers.comp(controlResults, experimentalResults)) {
return {
passed: false,
message:
'Query results from experiment collection did not match plain collection using collscan.',
query,
explain: experimentColl.explain().aggregate(query),
controlResults,
experimentalResults
};
}
}
return {passed: true};
}
// Motivation: Auto-parameterization and plan cache correctness.
function repeatQueriesReturnSameResults(experimentColl, queries, testHelpers) {
const query = queries[0];
const firstResult = experimentColl.aggregate(query).toArray();
for (let repeatRun = 0; repeatRun < 5; repeatRun++) {
const repeatResult = experimentColl.aggregate(query).toArray();
if (!testHelpers.comp(firstResult, repeatResult)) {
return {
passed: false,
message: 'Running the same query repeatedly did not yield the same results.',
firstResult,
repeatResult
};
}
}
return {passed: true};
}
// Motivation: Check that the plan cache key we use to lookup in the cache and to store in the cache
// are consistent.
function repeatQueriesUseCache(experimentColl, queries, testHelpers) {
const query = queries[0];
const explain = experimentColl.explain().aggregate(query);
if (getRejectedPlans(explain).length === 0) {
return {passed: true};
}
const firstResult = experimentColl.aggregate(query).toArray();
// Currently, both classic and SBE queries use the classic plan cache.
const serverStatusBefore = testHelpers.serverStatus();
const classicHitsBefore = serverStatusBefore.metrics.query.planCache.classic.hits;
const sbeHitsBefore = serverStatusBefore.metrics.query.planCache.sbe.hits;
for (let i = 0; i < 5; i++) {
experimentColl.aggregate(query).toArray();
}
const serverStatusAfter = testHelpers.serverStatus();
const classicHitsAfter = serverStatusAfter.metrics.query.planCache.classic.hits;
const sbeHitsAfter = serverStatusAfter.metrics.query.planCache.sbe.hits;
// If neither the SBE plan cache hits nor the classic plan cache hits have incremented, then our
// query must not have hit the cache.
if (checkSbeFullyEnabled(testHelpers.experimentDb) && sbeHitsAfter - sbeHitsBefore === 4) {
return {passed: true};
} else if (classicHitsAfter - classicHitsBefore === 4) {
return {passed: true};
}
return {
passed: false,
message: 'Plan cache hits failed to increment after running query several times.',
query,
explain,
classicHitsBefore,
classicHitsAfter,
planCacheState: testHelpers.getPlanCache(experimentColl).list()
};
}
// Motivation: Auto-parameterization and fetching from the plan cache correctness.
function cachedQueriesHaveSameResultsAsControlCollScan(experimentColl, queries, testHelpers) {
// Get the query shape cached.
const initialQuery = queries[0];
for (let i = 0; i < 3; i++) {
experimentColl.aggregate(initialQuery).toArray();
}
// Check that following queries, with different parameters, have correct results. These queries
// won't always use the cached plan because we don't model our autoparameterization rules in
// this test, but that's okay.
for (let i = 1; i < queries.length; i++) {
const query = queries[i];
const controlResults = testHelpers.controlColl.aggregate(query).toArray();
const experimentResults = experimentColl.aggregate(query).toArray();
if (!testHelpers.comp(controlResults, experimentResults)) {
return {
passed: false,
message: 'A query potentially using the plan cache has incorrect results. ' +
'The query that created the cache entry likely has different parameters.',
initialQuery,
query,
explain: experimentColl.explain().aggregate(query),
controlResults,
experimentResults
};
}
}
return {passed: true};
}
// Motivation: Check that our plan cache keys are deterministic.
function identicalQueryCreatesAtMostOneCacheEntry(experimentColl, queries, testHelpers) {
const query = queries[0];
const cacheBefore = testHelpers.getPlanCache(experimentColl).list();
for (let i = 0; i < 5; i++) {
experimentColl.aggregate(query).toArray();
}
const cacheAfter = testHelpers.getPlanCache(experimentColl).list();
if (cacheAfter.length - cacheBefore.length <= 1) {
return {passed: true};
}
return {
passed: false,
query,
explain: experimentColl.explain().aggregate(query),
cacheBefore,
cacheAfter,
numberOfCacheEntriesCreated: cacheAfter.length - cacheBefore.length
};
}
// A list of property tests, and the corresponding model for creating agg pipelines they use. Also
// include how many queries each property needs in order to run.
export const propertyTests = [
{
propertyFn: queryHasSameResultsAsControlCollScan,
aggModel: aggPipelineModel,
numQueriesNeeded: 10,
numRuns: 100
},
{
propertyFn: repeatQueriesReturnSameResults,
aggModel: aggPipelineModel,
numQueriesNeeded: 1,
numRuns: 50
},
{
propertyFn: repeatQueriesUseCache,
aggModel: aggPipelineModel,
numQueriesNeeded: 1,
numRuns: 300
},
{
propertyFn: cachedQueriesHaveSameResultsAsControlCollScan,
aggModel: aggPipelineModel,
numQueriesNeeded: 10,
numRuns: 50
},
{
propertyFn: identicalQueryCreatesAtMostOneCacheEntry,
// No $or allowed for this property, since a query with an $or may lead to subplanning
// with multiple cache entries.
aggModel: aggPipelineNoOrsModel,
numQueriesNeeded: 1,
numRuns: 300
}
];

View File

@ -0,0 +1,166 @@
/*
* Generates random query shapes, and checks that these queries satisfy a set of properties. These
* properties include caching rules, correctness against classic engine collscans, and others.
*/
import {listOfIndexesModel} from "jstests/libs/property_test_helpers/query_models.js";
import {propertyTests} from "jstests/libs/property_test_helpers/query_properties.js";
import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
// Force classic control collection with no indexes.
const controlConn =
MongoRunner.runMongod({setParameter: {internalQueryFrameworkControl: "forceClassicEngine"}});
assert.neq(controlConn, null, "mongod failed to start up");
const controlDb = controlConn.getDB(jsTestName());
const controlColl = controlDb.control_collection;
controlColl.drop();
const experimentConn = MongoRunner.runMongod();
assert.neq(experimentConn, null, "mongod failed to start up");
const experimentDb = experimentConn.getDB(jsTestName());
const experimentPlainColl = experimentDb.experiment_collection;
const experimentTsColl = experimentDb.experiment_ts_collection;
const datePrefix = 1680912440;
const alphabet = 'abcdefghijklmnopqrstuvwxyz';
const docs = [];
let id = 0;
for (let m = 0; m < 10; m++) {
let currentDate = 0;
for (let i = 0; i < 10; i++) {
docs.push({
_id: id,
t: new Date(datePrefix + currentDate - 100),
m: {m1: m, m2: 2 * m},
array: [i, i + 1, 2 * i],
a: NumberInt(10 - i),
b: alphabet.charAt(i)
});
currentDate += 25;
id += 1;
}
}
// Setup our control collection, as well as our experiment control and experiment TS collections.
assert.commandWorked(
controlDb.adminCommand({configureFailPoint: 'disablePipelineOptimization', mode: 'alwaysOn'}));
assert.commandWorked(experimentDb.createCollection(experimentTsColl.getName(), {
timeseries: {timeField: 't', metaField: 'm'},
}));
assert.commandWorked(controlColl.insert(docs));
assert.commandWorked(experimentPlainColl.insert(docs));
assert.commandWorked(experimentTsColl.insert(docs));
function getPlanCache(coll) {
assert(coll === experimentPlainColl || coll === experimentTsColl);
if (coll === experimentTsColl) {
return experimentDb.system.buckets.experiment_ts_collection.getPlanCache();
}
return coll.getPlanCache();
}
// Clear any state in the collection (other than data, which doesn't change). Create indexes the
// test uses, then run the property test.
function runProperty(propertyFn, isTs, indexes, pipelines) {
const experimentColl = isTs ? experimentTsColl : experimentPlainColl;
// Clear all state and create indexes.
getPlanCache(experimentColl).clear();
assert.commandWorked(experimentColl.dropIndexes());
for (const index of indexes) {
experimentColl.createIndex(index.def, index.options);
}
const testHelpers = {
comp: unorderedFriendlyEqual,
experimentDb,
controlColl,
getPlanCache,
serverStatus: () => experimentDb.serverStatus()
};
return propertyFn(experimentColl, pipelines, testHelpers);
}
// We need a custom reporter function to get more details on the failure. The default won't show
// what property failed very clearly, or provide more details beyond the counterexample.
function reporter(propertyFn) {
return function(runDetails) {
if (runDetails.failed) {
// Print the fast-check failure summary, the counterexample, and additional details
// about the property failure.
jsTestLog(runDetails);
const [isTs, indexes, pipelines] = runDetails.counterexample[0];
jsTestLog({isTs, indexes, pipelines});
jsTestLog(runProperty(propertyFn, isTs, indexes, pipelines));
jsTestLog('Failed property: ' + propertyFn.name);
assert(false);
}
};
}
function isWildcardIndex(index) {
for (const field of Object.keys(index.def)) {
if (field.includes('$**')) {
return true;
}
}
return false;
}
function isHashedIndex(index) {
for (const field of Object.keys(index.def)) {
if (index.def[field] === 'hashed') {
return true;
}
}
return false;
}
function isMultikey(index) {
for (const field of Object.keys(index.def)) {
if (field === 'array') {
return true;
}
}
return false;
}
// Test a property, given the property function (from query_properties.js). We construct a pipeline
// model from some metadata about the property, and call `runProperty` to clear state and call the
// property function correctly. On failure, `runProperty` is called again in the reporter, and
// prints out more details about the failed property.
function testProperty(propertyFn, aggModel, numQueriesNeeded, numRuns) {
const nPipelinesArb =
fc.array(aggModel, {minLength: numQueriesNeeded, maxLength: numQueriesNeeded});
const scenarioArb = fc.tuple(fc.boolean(), listOfIndexesModel, nPipelinesArb)
.filter(([isTs, indexes, pipelines]) => {
// --- Permanent conditions ---
// TS collections don't support Wildcard, Hashed, or Sparse indexes.
// Also multikey indexes on measurement fields are not allowed.
if (isTs) {
for (const index of indexes) {
if (index.options.sparse || isWildcardIndex(index) ||
isHashedIndex(index) || isMultikey(index)) {
return false;
}
}
}
return true;
});
fc.assert(fc.property(scenarioArb,
([isTs, indexes, pipelines]) => {
// Only return if the property passed or not. On failure,
// `runProperty` is called again and more details are exposed.
return runProperty(propertyFn, isTs, indexes, pipelines).passed;
}),
// TODO SERVER-91404 randomize in waterfall.
{seed: 4, numRuns, reporter: reporter(propertyFn)});
}
for (const {propertyFn, aggModel, numQueriesNeeded, numRuns} of propertyTests) {
testProperty(propertyFn, aggModel, numQueriesNeeded, numRuns);
}
MongoRunner.stopMongod(controlConn);
MongoRunner.stopMongod(experimentConn);

View File

@ -6,7 +6,7 @@ import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
const scalars = [fc.string(), fc.double(), fc.boolean(), fc.date(), fc.constant(null)];
const pathComponents = fc.constant("a", "b");
const pathComponents = fc.constantFrom("a", "b");
// Define our grammar for documents.
let documentModel = fc.letrec(
tie => ({
@ -44,13 +44,13 @@ let testMixedTypeQuerying = () => {
([docs, val, pathArray, compare]) => {
db.test.drop();
db.control.drop();
db.createCollection("test", {timeseries: {timeField: "t"}});
assert.commandWorked(db.createCollection("test", {timeseries: {timeField: "t"}}));
// Insert documents
docs.forEach(doc => {
let date = new ISODate();
db.test.insert(Object.assign({t: date}, doc));
db.control.insert(Object.assign({t: date}, doc));
assert.commandWorked(db.test.insert(Object.assign({t: date}, doc)));
assert.commandWorked(db.control.insert(Object.assign({t: date}, doc)));
});
// Construct the path to query on.

View File

@ -256,6 +256,39 @@ friendlyEqual = function(a, b) {
return false;
};
// Takes two arrays of documents, and returns whether they contain the same set of documents under
// very lenient conditions. The documents do not need to be in the same order for this to return
// true. Arrays are treated as sets (order is disregarded). Do not use this comparator unless all of
// these behaviors are necessary.
unorderedFriendlyEqual = function(result1, result2) {
// Sort the objects fields recursively.
const orderFn = function(doc) {
const keys = Object.keys(doc);
keys.sort();
let newDoc = {};
for (const key of keys) {
// Recurse through arrays and objects.
if (doc[key] instanceof Object) {
newDoc[key] = orderFn(doc[key]);
} else {
newDoc[key] = doc[key];
}
}
return newDoc;
};
result1 = result1.map(orderFn);
result2 = result2.map(orderFn);
const cmpFn = function(doc1, doc2) {
const doc1Json = tojson(doc1);
const doc2Json = tojson(doc2);
return doc1Json < doc2Json ? -1 : (doc1Json > doc2Json ? 1 : 0);
};
result1.sort(cmpFn);
result2.sort(cmpFn);
return friendlyEqual(result1, result2);
};
printStackTrace = function() {
try {
throw new Error("Printing Stack Trace");