0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-53872 Support time-series collections in tenant migration failover test.

This commit is contained in:
Wenbin Zhu 2021-04-02 20:32:34 +00:00 committed by Evergreen Agent
parent 5ec706707e
commit 424758195f

View File

@ -1,104 +1,143 @@
/**
* Tests that in tenant migration, the recipient set can resume collection cloning from the last
* document cloned after a failover.
* @tags: [requires_majority_read_concern, requires_fcv_49, incompatible_with_windows_tls]
* @tags: [
* requires_majority_read_concern,
* requires_fcv_49,
* incompatible_with_windows_tls,
* incompatible_with_eft
* ]
*/
(function() {
"use strict";
const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) {
"use strict";
load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
load("jstests/libs/parallelTester.js"); // for 'Thread'
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");
load("jstests/core/timeseries/libs/timeseries.js");
load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
setParameter: {
// Use a batch size of 2 so that collection cloner requires more than a single batch to
// complete.
collectionClonerBatchSize: 2,
// Allow reads on recipient before migration completes for testing.
'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
}
})
});
const batchSize = 2;
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
setParameter: {
// Use a batch size of 2 so that collection cloner requires more than a single
// batch to complete.
collectionClonerBatchSize: batchSize,
// Allow reads on recipient before migration completes for testing.
'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
}
})
});
recipientRst.startSet();
recipientRst.initiate();
if (!TenantMigrationUtil.isFeatureFlagEnabled(recipientRst.getPrimary())) {
jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
recipientRst.startSet();
recipientRst.initiate();
const tenantMigrationTest =
new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
const donarPrimary = tenantMigrationTest.getDonorPrimary();
if (!TenantMigrationUtil.isFeatureFlagEnabled(donarPrimary)) {
jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
tenantMigrationTest.stop();
recipientRst.stopSet();
return;
}
if (isTimeSeries && !TimeseriesTest.timeseriesCollectionsEnabled(donarPrimary)) {
jsTestLog("Skipping test because the time-series collection feature flag is disabled");
tenantMigrationTest.stop();
recipientRst.stopSet();
return;
}
const tenantId = "testTenantId";
const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const donorDB = donarPrimary.getDB(dbName);
const collName = "testColl";
const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
// Create collection and insert documents.
assert.commandWorked(createCollFn(donorDB, collName));
tenantMigrationTest.insertDonorDB(dbName, collName, docs);
const migrationId = UUID();
const migrationIdString = extractUUIDFromObject(migrationId);
const migrationOpts = {
migrationIdString: migrationIdString,
recipientConnString: tenantMigrationTest.getRecipientConnString(),
tenantId: tenantId,
};
// Configure a fail point to have the recipient primary hang after cloning 2 documents.
const recipientDb = recipientPrimary.getDB(dbName);
let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName)
: recipientDb.getCollection(collName);
const hangDuringCollectionClone =
configureFailPoint(recipientDb,
"tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
{nss: recipientColl.getFullName()});
// Start a migration and wait for recipient to hang after cloning 2 documents.
assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
hangDuringCollectionClone.wait();
assert.soon(() => recipientColl.find().itcount() === 2);
// Insert some documents that will be fetched by the recipient. This is to test that on
// failover, the fetcher will resume fetching from where it left off. The system is expected
// to crash if the recipient fetches a duplicate oplog entry upon resuming the migration.
tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
assert.soon(() => {
const configDb = recipientPrimary.getDB("config");
const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
});
// Step up a new node in the recipient set and trigger a failover. The new primary should resume
// cloning starting from the third document.
const newRecipientPrimary = recipientRst.getSecondaries()[0];
recipientRst.awaitLastOpCommitted();
assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
hangDuringCollectionClone.off();
recipientRst.getPrimary();
// The migration should go through after recipient failover.
const stateRes =
assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted);
// Check that recipient has cloned all documents in the collection.
recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName);
assert.eq(docs.length, recipientColl.find().itcount());
assert.docEq(recipientColl.find().sort({_id: 1}).toArray(), docs);
TenantMigrationUtil.checkTenantDBHashes(
tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
return;
}
const tenantMigrationTest =
new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
const tenantId = "testTenantId";
const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const collName = "testColl";
const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
// Test _id with mixed bson types.
const docs = [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}];
tenantMigrationTest.insertDonorDB(dbName, collName, docs);
const migrationId = UUID();
const migrationIdString = extractUUIDFromObject(migrationId);
const migrationOpts = {
migrationIdString: migrationIdString,
recipientConnString: tenantMigrationTest.getRecipientConnString(),
tenantId: tenantId,
};
// Configure a fail point to have the recipient primary hang after cloning 2 documents.
const recipientDb = recipientPrimary.getDB(dbName);
let recipientColl = recipientDb.getCollection(collName);
const hangDuringCollectionClone =
configureFailPoint(recipientDb,
"tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
{nss: recipientColl.getFullName()});
jsTestLog("Running tenant migration test for time-series collection");
tenantMigrationFailoverTest(true,
(db, collName) => db.createCollection(
collName, {timeseries: {timeField: "time", metaField: "bucket"}}),
[
// Group each document in its own bucket in order to work with the
// collectionClonerBatchSize we set at the recipient replSet.
{_id: 1, time: ISODate(), bucket: "a"},
{_id: 2, time: ISODate(), bucket: "b"},
{_id: 3, time: ISODate(), bucket: "c"},
{_id: 4, time: ISODate(), bucket: "d"}
]);
// Start a migration and wait for recipient to hang after cloning 2 documents.
const donorRstArgs = TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst());
const migrationThread =
new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs);
migrationThread.start();
hangDuringCollectionClone.wait();
assert.soon(() => recipientColl.find().itcount() === 2);
// Insert some documents that will be fetched by the recipient. This is to test that on failover,
// the fetcher will resume fetching from where it left off. The system is expected to crash if
// the recipient fetches a duplicate oplog entry upon resuming the migration.
tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
assert.soon(() => {
const configDb = recipientPrimary.getDB("config");
const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
});
// Step up a new node in the recipient set and trigger a failover. The new primary should resume
// cloning starting from the third document.
const newRecipientPrimary = recipientRst.getSecondaries()[0];
recipientRst.awaitLastOpCommitted();
assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
hangDuringCollectionClone.off();
recipientRst.getPrimary();
// The migration should go through after recipient failover.
assert.commandWorked(migrationThread.returnData());
// Check that recipient has cloned all documents in the collection.
recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName);
assert.eq(4, recipientColl.find().itcount());
assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs);
TenantMigrationUtil.checkTenantDBHashes(
tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
jsTestLog("Running tenant migration test for regular collection");
tenantMigrationFailoverTest(false,
(db, collName) => db.createCollection(collName),
[{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]);
})();