mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-27 15:06:34 +01:00
771dabd098
GitOrigin-RevId: 744aa110a53786b23c62ff53f87a1418b5991e8d
115 lines
4.8 KiB
JavaScript
115 lines
4.8 KiB
JavaScript
/**
|
|
* Tests that while there is an unfinished migration pending recovery, if a new migration (of a
|
|
* different collection) attempts to start, it will first need to recover the unfinished migration.
|
|
*
|
|
* @tags: [
|
|
* # In the event of a config server step down, the new primary balancer may attempt to recover
|
|
* # that migration by sending a new `moveChunk` command to the donor shard causing the test to
|
|
* # hang.
|
|
* does_not_support_stepdowns,
|
|
* # Flaky with a config shard because the failovers it triggers trigger a retry from mongos,
|
|
* # which can prevent the fail point from being unset and time out.
|
|
* config_shard_incompatible,
|
|
* ]
|
|
*/
|
|
import {moveChunkParallel} from "jstests/libs/chunk_manipulation_util.js";
|
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
|
|
// Disable checking for index consistency to ensure that the config server doesn't trigger a
|
|
// StaleShardVersion exception on the shards and cause them to refresh their sharding metadata. That
|
|
// would interfere with the precise migration recovery interleaving this test requires.
|
|
const nodeOptions = {
|
|
setParameter: {enableShardedIndexConsistencyCheck: false}
|
|
};
|
|
|
|
// Disable balancer in order to prevent balancing rounds from triggering shard version refreshes on
|
|
// the shards that would interfere with the migration recovery interleaving this test requires.
|
|
var st = new ShardingTest({
|
|
shards: {rs0: {nodes: 2}, rs1: {nodes: 1}},
|
|
config: 3,
|
|
other: {configOptions: nodeOptions, enableBalancer: false}
|
|
});
|
|
let staticMongod = MongoRunner.runMongod({});
|
|
|
|
const dbName = "test";
|
|
const collNameA = "foo";
|
|
const collNameB = "bar";
|
|
const nsA = dbName + "." + collNameA;
|
|
const nsB = dbName + "." + collNameB;
|
|
|
|
assert.commandWorked(
|
|
st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
|
|
assert.commandWorked(st.s.adminCommand({shardCollection: nsA, key: {_id: 1}}));
|
|
assert.commandWorked(st.s.adminCommand({shardCollection: nsB, key: {_id: 1}}));
|
|
|
|
// Hang before commit migration
|
|
let moveChunkHangAtStep5Failpoint = configureFailPoint(st.rs0.getPrimary(), "moveChunkHangAtStep5");
|
|
var joinMoveChunk1 = moveChunkParallel(
|
|
staticMongod, st.s0.host, {_id: 0}, null, nsA, st.shard1.shardName, true /* expectSuccess */);
|
|
|
|
moveChunkHangAtStep5Failpoint.wait();
|
|
|
|
let migrationCommitNetworkErrorFailpoint =
|
|
configureFailPoint(st.rs0.getPrimary(), "migrationCommitNetworkError");
|
|
let skipShardFilteringMetadataRefreshFailpoint =
|
|
configureFailPoint(st.rs0.getPrimary(), "skipShardFilteringMetadataRefresh");
|
|
|
|
moveChunkHangAtStep5Failpoint.off();
|
|
migrationCommitNetworkErrorFailpoint.wait();
|
|
|
|
// Don't let the migration recovery finish on the secondary that will next be stepped-up.
|
|
const rs0Secondary = st.rs0.getSecondary();
|
|
let hangInEnsureChunkVersionIsGreaterThanInterruptibleFailpoint =
|
|
configureFailPoint(rs0Secondary, "hangInEnsureChunkVersionIsGreaterThanInterruptible");
|
|
|
|
st.rs0.stepUp(rs0Secondary);
|
|
|
|
joinMoveChunk1();
|
|
migrationCommitNetworkErrorFailpoint.off();
|
|
skipShardFilteringMetadataRefreshFailpoint.off();
|
|
|
|
// The migration is left pending recovery.
|
|
{
|
|
let migrationCoordinatorDocuments =
|
|
st.rs0.getPrimary().getDB('config')['migrationCoordinators'].find().toArray();
|
|
assert.eq(1, migrationCoordinatorDocuments.length);
|
|
assert.eq(nsA, migrationCoordinatorDocuments[0].nss);
|
|
}
|
|
|
|
// Start a second migration on a different collection and wait until it persists its recovery
|
|
// document.
|
|
let moveChunkHangAtStep3Failpoint = configureFailPoint(st.rs0.getPrimary(), "moveChunkHangAtStep3");
|
|
|
|
var joinMoveChunk2 = moveChunkParallel(
|
|
staticMongod, st.s0.host, {_id: 0}, null, nsB, st.shard1.shardName, true /* expectSuccess */);
|
|
|
|
// Check that second migration won't be able to persist its coordinator document until the shard has
|
|
// been able to recover the first migration.
|
|
sleep(5 * 1000);
|
|
{
|
|
// There's still only one migration recovery document, corresponding to the first migration
|
|
let migrationCoordinatorDocuments =
|
|
st.rs0.getPrimary().getDB('config')['migrationCoordinators'].find().toArray();
|
|
assert.eq(1, migrationCoordinatorDocuments.length);
|
|
assert.eq(nsA, migrationCoordinatorDocuments[0].nss);
|
|
}
|
|
|
|
// Let the migration recovery complete
|
|
hangInEnsureChunkVersionIsGreaterThanInterruptibleFailpoint.off();
|
|
moveChunkHangAtStep3Failpoint.wait();
|
|
|
|
// Check that the first migration has been recovered. There must be only one
|
|
// config.migrationCoordinators document, which corresponds to the second migration.
|
|
{
|
|
let migrationCoordinatorDocuments =
|
|
st.rs0.getPrimary().getDB('config')['migrationCoordinators'].find().toArray();
|
|
assert.eq(1, migrationCoordinatorDocuments.length);
|
|
assert.eq(nsB, migrationCoordinatorDocuments[0].nss);
|
|
}
|
|
|
|
moveChunkHangAtStep3Failpoint.off();
|
|
joinMoveChunk2();
|
|
|
|
MongoRunner.stopMongod(staticMongod);
|
|
st.stop(); |