mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
SERVER-53693 Improve local powercycle testing experience
This commit is contained in:
parent
69f865ae18
commit
aafa095c4d
@ -1 +0,0 @@
|
||||
"""Empty."""
|
@ -1,14 +0,0 @@
|
||||
"""Command-line entry-point for powercycle_operations."""
|
||||
|
||||
from buildscripts.powercycle_setup import parser
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""
|
||||
Execute Main function for powercycle_operations.
|
||||
|
||||
:param argv: sys.argv
|
||||
:return: None
|
||||
"""
|
||||
subcommand = parser.parse_command_line(argv[1:])
|
||||
subcommand.execute()
|
@ -1,44 +0,0 @@
|
||||
"""Parser for command line arguments."""
|
||||
|
||||
import argparse
|
||||
|
||||
import buildscripts.powercycle_setup.plugins as plugins
|
||||
|
||||
_PLUGINS = [plugins.PowercyclePlugin()]
|
||||
|
||||
|
||||
def _add_subcommands():
|
||||
"""Create and return the command line arguments parser."""
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers(dest="command")
|
||||
|
||||
# Add sub-commands.
|
||||
for plugin in _PLUGINS:
|
||||
plugin.add_subcommand(subparsers)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def parse(sys_args):
|
||||
"""Parse the CLI args."""
|
||||
|
||||
# Split out this function for easier testing.
|
||||
parser = _add_subcommands()
|
||||
parsed_args = parser.parse_args(sys_args)
|
||||
|
||||
return parser, parsed_args
|
||||
|
||||
|
||||
def parse_command_line(sys_args, **kwargs):
|
||||
"""Parse the command line arguments passed to powercycle_operations.py and return the subcommand object to execute."""
|
||||
parser, parsed_args = parse(sys_args)
|
||||
|
||||
subcommand = parsed_args.command
|
||||
|
||||
for plugin in _PLUGINS:
|
||||
subcommand_obj = plugin.parse(subcommand, parser, parsed_args, **kwargs)
|
||||
if subcommand_obj is not None:
|
||||
return subcommand_obj
|
||||
|
||||
raise RuntimeError(
|
||||
f"Powercycle configuration has invalid subcommand: {subcommand}. Try '--help'")
|
@ -1,422 +0,0 @@
|
||||
"""Set up powercycle remote operations."""
|
||||
import getpass
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
from buildscripts.powercycle_setup.remote_operations import RemoteOperations, SSHOperation
|
||||
from buildscripts.resmokelib.plugin import PluginInterface, Subcommand
|
||||
from buildscripts.resmokelib.powercycle import powercycle_constants
|
||||
|
||||
|
||||
class PowercycleCommand(Subcommand): # pylint: disable=abstract-method, too-many-instance-attributes
|
||||
"""Base class for remote operations to set up powercycle."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize PowercycleCommand."""
|
||||
|
||||
self.expansions = yaml.safe_load(open(powercycle_constants.EXPANSIONS_FILE))
|
||||
|
||||
self.retries = 0 if "ssh_retries" not in self.expansions else int(
|
||||
self.expansions["ssh_retries"])
|
||||
self.ssh_identity = self._get_ssh_identity()
|
||||
self.ssh_connection_options = self.ssh_identity + " " + self.expansions[
|
||||
"ssh_connection_options"]
|
||||
self.sudo = "" if self.is_windows() else "sudo"
|
||||
# The username on the Windows image that powercycle uses is currently the default user.
|
||||
self.user = "Administrator" if self.is_windows() else getpass.getuser()
|
||||
self.user_host = self.user + "@" + self.expansions["private_ip_address"]
|
||||
|
||||
self.remote_op = RemoteOperations(
|
||||
user_host=self.user_host,
|
||||
ssh_connection_options=self.ssh_connection_options,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_windows() -> bool:
|
||||
""":return: True if running on Windows."""
|
||||
return sys.platform == "win32" or sys.platform == "cygwin"
|
||||
|
||||
@staticmethod
|
||||
def _call(cmd):
|
||||
cmd = shlex.split(cmd)
|
||||
# Use a common pipe for stdout & stderr for logging.
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
buff_stdout, _ = process.communicate()
|
||||
buff = buff_stdout.decode("utf-8", "replace")
|
||||
return process.poll(), buff
|
||||
|
||||
def _get_posix_workdir(self) -> str:
|
||||
workdir = self.expansions['workdir']
|
||||
if self.is_windows():
|
||||
workdir = workdir.replace("\\", "/")
|
||||
return workdir
|
||||
|
||||
def _get_ssh_identity(self) -> str:
|
||||
workdir = self._get_posix_workdir()
|
||||
pem_file = '/'.join([workdir, 'powercycle.pem'])
|
||||
|
||||
return f"-i {pem_file}"
|
||||
|
||||
|
||||
class SetUpEC2Instance(PowercycleCommand):
|
||||
"""Set up EC2 instance."""
|
||||
|
||||
COMMAND = "setUpEC2Instance"
|
||||
|
||||
def execute(self) -> None: # pylint: disable=too-many-instance-attributes, too-many-locals, too-many-statements
|
||||
""":return: None."""
|
||||
|
||||
# First operation -
|
||||
# Create remote_dir.
|
||||
group_cmd = f"id -Gn {self.user}"
|
||||
_, group = self._call(group_cmd)
|
||||
group = group.split(" ")[0]
|
||||
user_group = f"{self.user}:{group}"
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
db_path = powercycle_constants.DB_PATH
|
||||
|
||||
set_permission_stmt = f"chmod -R 777"
|
||||
if self.is_windows():
|
||||
set_permission_stmt = f"setfacl -s user::rwx,group::rwx,other::rwx"
|
||||
cmds = f"{self.sudo} mkdir -p {remote_dir}; {self.sudo} chown -R {user_group} {remote_dir}; {set_permission_stmt} {remote_dir}; ls -ld {remote_dir}"
|
||||
cmds = f"{cmds}; {self.sudo} mkdir -p {db_path}; {self.sudo} chown -R {user_group} {db_path}; {set_permission_stmt} {db_path}; ls -ld {db_path}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Second operation -
|
||||
# Copy buildscripts and mongoDB executables to the remote host.
|
||||
files = ["etc", "buildscripts", "dist-test/bin"]
|
||||
|
||||
shared_libs = "dist-test/lib"
|
||||
if os.path.isdir(shared_libs):
|
||||
files.append(shared_libs)
|
||||
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir)
|
||||
|
||||
# Third operation -
|
||||
# Set up virtualenv on remote.
|
||||
venv = powercycle_constants.VIRTUALENV_DIR
|
||||
python = "/opt/mongodbtoolchain/v3/bin/python3" if "python" not in self.expansions else self.expansions[
|
||||
"python"]
|
||||
|
||||
cmds = f"python_loc=$(which {python})"
|
||||
cmds = f"{cmds}; remote_dir={remote_dir}"
|
||||
cmds = f"{cmds}; if [ \"Windows_NT\" = \"$OS\" ]; then python_loc=$(cygpath -w $python_loc); remote_dir=$(cygpath -w $remote_dir); fi"
|
||||
cmds = f"{cmds}; virtualenv --python $python_loc --system-site-packages {venv}"
|
||||
cmds = f"{cmds}; activate=$(find {venv} -name 'activate')"
|
||||
cmds = f"{cmds}; . $activate"
|
||||
cmds = f"{cmds}; pip3 install -r $remote_dir/etc/pip/powercycle-requirements.txt"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Fourth operation -
|
||||
# Enable core dumps on non-Windows remote hosts.
|
||||
# The core pattern must specify a director, since mongod --fork will chdir("/")
|
||||
# and cannot generate a core dump there (see SERVER-21635).
|
||||
# We need to reboot the host for the core limits to take effect.
|
||||
if not self.is_windows():
|
||||
core_pattern = f"{remote_dir}/dump_%e.%p.core"
|
||||
sysctl_conf = "/etc/sysctl.conf"
|
||||
cmds = "ulimit -a"
|
||||
cmds = f"{cmds}; echo \"{self.user} - core unlimited\" | {self.sudo} tee -a /etc/security/limits.conf"
|
||||
cmds = f"{cmds}; if [ -f {sysctl_conf} ]"
|
||||
cmds = f"{cmds}; then grep ^kernel.core_pattern {sysctl_conf}"
|
||||
cmds = f"{cmds}; if [ $? -eq 0 ]"
|
||||
cmds = f"{cmds}; then {self.sudo} sed -i \"s,kernel.core_pattern=.*,kernel.core_pattern=$core_pattern,\" {sysctl_conf}"
|
||||
cmds = f"{cmds}; else echo \"kernel.core_pattern={core_pattern}\" | {self.sudo} tee -a {sysctl_conf}"
|
||||
cmds = f"{cmds}; fi"
|
||||
cmds = f"{cmds}; else echo Cannot change the core pattern and no core dumps will be generated."
|
||||
cmds = f"{cmds}; fi"
|
||||
# The following line for restarting the machine is based on
|
||||
# https://unix.stackexchange.com/a/349558 in order to ensure the ssh client gets a
|
||||
# response from the remote machine before it restarts.
|
||||
cmds = f"{cmds}; nohup {self.sudo} reboot &>/dev/null & exit"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Fifth operation -
|
||||
# Print the ulimit & kernel.core_pattern
|
||||
if not self.is_windows():
|
||||
# Always exit successfully, as this is just informational.
|
||||
cmds = "uptime"
|
||||
cmds = f"{cmds}; ulimit -a"
|
||||
cmds = f"{cmds}; if [ -f /sbin/sysctl ]"
|
||||
cmds = f"{cmds}; then /sbin/sysctl kernel.core_pattern"
|
||||
cmds = f"{cmds}; fi"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None, True)
|
||||
|
||||
# Sixth operation -
|
||||
# Set up curator to collect system & process stats on remote.
|
||||
variant = "windows-64" if self.is_windows() else "ubuntu1604"
|
||||
curator_hash = "b0c3c0fc68bce26d9572796d6bed3af4a298e30e"
|
||||
curator_url = f"https://s3.amazonaws.com/boxes.10gen.com/build/curator/curator-dist-{variant}-{curator_hash}.tar.gz"
|
||||
cmds = f"curl -s {curator_url} | tar -xzv"
|
||||
monitor_system_file = powercycle_constants.MONITOR_SYSTEM_FILE
|
||||
monitor_proc_file = powercycle_constants.MONITOR_PROC_FILE
|
||||
if self.is_windows():
|
||||
# Since curator runs as SYSTEM user, ensure the output files can be accessed.
|
||||
cmds = f"{cmds}; touch {monitor_system_file}; chmod 777 {monitor_system_file}"
|
||||
cmds = f"{cmds}; cygrunsrv --install curator_sys --path curator --chdir $HOME --args 'stat system --file {monitor_system_file}'"
|
||||
cmds = f"{cmds}; touch {monitor_proc_file}; chmod 777 {monitor_proc_file}"
|
||||
cmds = f"{cmds}; cygrunsrv --install curator_proc --path curator --chdir $HOME --args 'stat process-all --file {monitor_proc_file}'"
|
||||
cmds = f"{cmds}; cygrunsrv --start curator_sys"
|
||||
cmds = f"{cmds}; cygrunsrv --start curator_proc"
|
||||
else:
|
||||
cmds = f"{cmds}; touch {monitor_system_file} {monitor_proc_file}"
|
||||
cmds = f"{cmds}; cmd=\"@reboot cd $HOME && {self.sudo} ./curator stat system >> {monitor_system_file}\""
|
||||
cmds = f"{cmds}; (crontab -l ; echo \"$cmd\") | crontab -"
|
||||
cmds = f"{cmds}; cmd=\"@reboot cd $HOME && $sudo ./curator stat process-all >> {monitor_proc_file}\""
|
||||
cmds = f"{cmds}; (crontab -l ; echo \"$cmd\") | crontab -"
|
||||
cmds = f"{cmds}; crontab -l"
|
||||
cmds = f"{cmds}; {{ {self.sudo} $HOME/curator stat system --file {monitor_system_file} > /dev/null 2>&1 & {self.sudo} $HOME/curator stat process-all --file {monitor_proc_file} > /dev/null 2>&1 & }} & disown"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True)
|
||||
|
||||
# Seventh operation -
|
||||
# Install NotMyFault, used to crash Windows.
|
||||
if self.is_windows():
|
||||
windows_crash_zip = powercycle_constants.WINDOWS_CRASH_ZIP
|
||||
windows_crash_dl = powercycle_constants.WINDOWS_CRASH_DL
|
||||
windows_crash_dir = powercycle_constants.WINDOWS_CRASH_DIR
|
||||
|
||||
cmds = f"curl -s -o {windows_crash_zip} {windows_crash_dl}"
|
||||
cmds = f"{cmds}; unzip -q {windows_crash_zip} -d {windows_crash_dir}"
|
||||
cmds = f"{cmds}; chmod +x {windows_crash_dir}/*.exe"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
|
||||
class TarEC2Artifacts(PowercycleCommand):
|
||||
"""Tar EC2 artifacts."""
|
||||
|
||||
COMMAND = "tarEC2Artifacts"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
tar_cmd = "tar" if "tar" not in self.expansions else self.expansions["tar"]
|
||||
ec2_artifacts = powercycle_constants.LOG_PATH
|
||||
# On test success, we only archive mongod.log.
|
||||
if self.expansions.get("exit_code", "1") != "0":
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.DB_PATH}"
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.BACKUP_ARTIFACTS}"
|
||||
if self.is_windows():
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.EVENT_LOGPATH}"
|
||||
|
||||
cmd = f"{tar_cmd} czf ec2_artifacts.tgz {ec2_artifacts}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmd, None)
|
||||
|
||||
|
||||
class CopyEC2Artifacts(PowercycleCommand):
|
||||
"""Copy EC2 artifacts."""
|
||||
|
||||
COMMAND = "copyEC2Artifacts"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, "ec2_artifacts.tgz", None)
|
||||
|
||||
|
||||
class GatherRemoteEventLogs(PowercycleCommand):
|
||||
"""
|
||||
Gather remote event logs.
|
||||
|
||||
The event logs on Windows are a useful diagnostic to have when determining if something bad
|
||||
happened to the remote machine after it was repeatedly crashed during powercycle testing. For
|
||||
example, the Application and System event logs have previously revealed that the mongod.exe
|
||||
process abruptly exited due to not being able to open a file despite the process successfully
|
||||
being restarted and responding to network requests.
|
||||
"""
|
||||
|
||||
COMMAND = "gatherRemoteEventLogs"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if not self.is_windows() or self.expansions.get("ec2_ssh_failure", ""):
|
||||
return
|
||||
|
||||
event_logpath = powercycle_constants.EVENT_LOGPATH
|
||||
cmds = f"mkdir -p {event_logpath}"
|
||||
cmds = f"{cmds}; wevtutil qe Application /c:10000 /rd:true /f:Text > {event_logpath}/application.log"
|
||||
cmds = f"{cmds}; wevtutil qe Security /c:10000 /rd:true /f:Text > {event_logpath}/security.log"
|
||||
cmds = f"{cmds}; wevtutil qe System /c:10000 /rd:true /f:Text > {event_logpath}/system.log"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
|
||||
class GatherRemoteMongoCoredumps(PowercycleCommand):
|
||||
"""Gather Remote Mongo Coredumps."""
|
||||
|
||||
COMMAND = "gatherRemoteMongoCoredumps"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
# Find all core files and move to $remote_dir
|
||||
cmds = "core_files=$(/usr/bin/find -H . \\( -name '*.core' -o -name '*.mdmp' \\) 2> /dev/null)"
|
||||
cmds = f"{cmds}; if [ -z \"$core_files\" ]; then exit 0; fi"
|
||||
cmds = f"{cmds}; echo Found remote core files $core_files, moving to $(pwd)"
|
||||
cmds = f"{cmds}; for core_file in $core_files"
|
||||
cmds = f"{cmds}; do base_name=$(echo $core_file | sed 's/.*///')"
|
||||
cmds = f"{cmds}; if [ ! -f $base_name ]; then mv $core_file .; fi"
|
||||
cmds = f"{cmds}; done"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, remote_dir)
|
||||
|
||||
|
||||
class CopyRemoteMongoCoredumps(PowercycleCommand):
|
||||
"""Copy Remote Mongo Coredumps."""
|
||||
|
||||
COMMAND = "copyRemoteMongoCoredumps"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if self.expansions.get("ec2_ssh_failure", ""):
|
||||
return
|
||||
|
||||
if self.is_windows():
|
||||
core_suffix = "mdmp"
|
||||
else:
|
||||
core_suffix = "core"
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
# Core file may not exist so we ignore the return code.
|
||||
self.remote_op.operation(SSHOperation.SHELL, f"{remote_dir}/*.{core_suffix}", None, True)
|
||||
|
||||
|
||||
class CopyEC2MonitorFiles(PowercycleCommand):
|
||||
"""Copy EC2 monitor files."""
|
||||
|
||||
COMMAND = "copyEC2MonitorFiles"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
tar_cmd = "tar" if "tar" not in self.expansions else self.expansions["tar"]
|
||||
cmd = f"{tar_cmd} czf ec2_monitor_files.tgz {powercycle_constants.EC2_MONITOR_FILES}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmd, None)
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, 'ec2_monitor_files.tgz', None)
|
||||
|
||||
|
||||
class RunHangAnalyzerOnRemoteInstance(PowercycleCommand):
|
||||
"""Run the hang-analyzer on a remote instance."""
|
||||
|
||||
COMMAND = "runHangAnalyzerOnRemoteInstance"
|
||||
|
||||
def execute(self) -> None: # pylint: disable=too-many-locals
|
||||
""":return: None."""
|
||||
if "private_ip_address" not in self.expansions:
|
||||
return
|
||||
hang_analyzer_processes = "dbtest,java,mongo,mongod,mongos,python,_test" if "hang_analyzer_processes" not in self.expansions else self.expansions[
|
||||
"hang_analyzer_processes"]
|
||||
hang_analyzer_option = f"-o file -o stdout -p {hang_analyzer_processes}"
|
||||
hang_analyzer_dump_core = True if "hang_analyzer_dump_core" not in self.expansions else self.expansions[
|
||||
"hang_analyzer_dump_core"]
|
||||
if hang_analyzer_dump_core:
|
||||
hang_analyzer_option = f"-c {hang_analyzer_option}"
|
||||
|
||||
core_ext = "core"
|
||||
if self.is_windows():
|
||||
core_ext = "mdmp"
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
files = self._call("ls")[1].split("\n")
|
||||
dbg_regex = re.compile(r"(\.debug$)|(\.dSYM$)|(\.pdb$)")
|
||||
debug_files = [f for f in files if dbg_regex.match(f)]
|
||||
file_param = []
|
||||
for debug_file in debug_files:
|
||||
file_param.append(debug_file)
|
||||
if file_param:
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, file_param, remote_dir)
|
||||
|
||||
# Activate virtualenv on remote host. The virtualenv bin_dir is different for Linux and
|
||||
# Windows.
|
||||
venv = powercycle_constants.VIRTUALENV_DIR
|
||||
cmds = f"activate=$(find {venv} -name 'activate')"
|
||||
cmds = f"{cmds}; . $activate"
|
||||
# In the 'cmds' variable we pass to remote host, use 'python' instead of '$python' since
|
||||
# we don't want to evaluate the local python variable, but instead pass the python string
|
||||
# so the remote host will use the right python when the virtualenv is sourced.
|
||||
cmds = f"{cmds}; cd {remote_dir}"
|
||||
cmds = f"{cmds}; PATH=\"/opt/mongodbtoolchain/gdb/bin:$PATH\" python buildscripts/resmoke.py hang-analyzer {hang_analyzer_option}"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
file_param = []
|
||||
file_param.append(f"{remote_dir}/debugger*.*")
|
||||
file_param.append(f"{remote_dir}/*.{core_ext}")
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, file_param, None)
|
||||
|
||||
|
||||
class NoOp(Subcommand):
|
||||
"""No op."""
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
pass
|
||||
|
||||
|
||||
class PowercyclePlugin(PluginInterface):
|
||||
"""Interact with powercycle_operations."""
|
||||
|
||||
def add_subcommand(self, subparsers):
|
||||
"""
|
||||
Add 'powercycle_operations' subcommand.
|
||||
|
||||
:param subparsers: argparse parser to add to
|
||||
:return: None
|
||||
"""
|
||||
subparsers.add_parser(SetUpEC2Instance.COMMAND)
|
||||
subparsers.add_parser(TarEC2Artifacts.COMMAND)
|
||||
subparsers.add_parser(CopyEC2Artifacts.COMMAND)
|
||||
subparsers.add_parser(GatherRemoteEventLogs.COMMAND)
|
||||
subparsers.add_parser(GatherRemoteMongoCoredumps.COMMAND)
|
||||
subparsers.add_parser(CopyRemoteMongoCoredumps.COMMAND)
|
||||
subparsers.add_parser(CopyEC2MonitorFiles.COMMAND)
|
||||
subparsers.add_parser(RunHangAnalyzerOnRemoteInstance.COMMAND)
|
||||
# Accept arbitrary args like 'powercycle.py undodb foobar', but ignore them.
|
||||
|
||||
def parse(self, subcommand, parser, parsed_args, **kwargs): # pylint: disable=too-many-return-statements
|
||||
"""
|
||||
Return powercycle_operation if command is one we recognize.
|
||||
|
||||
:param subcommand: equivalent to parsed_args.command
|
||||
:param parser: parser used
|
||||
:param parsed_args: output of parsing
|
||||
:param kwargs: additional args
|
||||
:return: None or a Subcommand
|
||||
"""
|
||||
# Only return subcommand if expansion file has been written.
|
||||
if not os.path.exists(powercycle_constants.EXPANSIONS_FILE):
|
||||
print(f"Did not find {powercycle_constants.EXPANSIONS_FILE}, skipping {subcommand}.")
|
||||
return NoOp()
|
||||
|
||||
if subcommand == SetUpEC2Instance.COMMAND:
|
||||
return SetUpEC2Instance()
|
||||
elif subcommand == TarEC2Artifacts.COMMAND:
|
||||
return TarEC2Artifacts()
|
||||
elif subcommand == CopyEC2Artifacts.COMMAND:
|
||||
return CopyEC2Artifacts()
|
||||
elif subcommand == GatherRemoteEventLogs.COMMAND:
|
||||
return GatherRemoteEventLogs()
|
||||
elif subcommand == GatherRemoteMongoCoredumps.COMMAND:
|
||||
return GatherRemoteMongoCoredumps()
|
||||
elif subcommand == CopyRemoteMongoCoredumps.COMMAND:
|
||||
return CopyRemoteMongoCoredumps()
|
||||
elif subcommand == CopyEC2MonitorFiles.COMMAND:
|
||||
return CopyEC2MonitorFiles()
|
||||
elif subcommand == RunHangAnalyzerOnRemoteInstance.COMMAND:
|
||||
return RunHangAnalyzerOnRemoteInstance()
|
||||
else:
|
||||
return None
|
@ -11,6 +11,10 @@ import argparse
|
||||
|
||||
from buildscripts.resmokelib.plugin import PluginInterface, Subcommand
|
||||
from buildscripts.resmokelib.powercycle import powercycle, powercycle_config, powercycle_constants
|
||||
from buildscripts.resmokelib.powercycle.remote_hang_analyzer import RunHangAnalyzerOnRemoteInstance
|
||||
from buildscripts.resmokelib.powercycle.save_diagnostics import GatherRemoteEventLogs, TarEC2Artifacts, \
|
||||
CopyEC2Artifacts, CopyEC2MonitorFiles, GatherRemoteMongoCoredumps, CopyRemoteMongoCoredumps
|
||||
from buildscripts.resmokelib.powercycle.setup import SetUpEC2Instance
|
||||
|
||||
SUBCOMMAND = "powercycle"
|
||||
|
||||
@ -18,6 +22,12 @@ SUBCOMMAND = "powercycle"
|
||||
class Powercycle(Subcommand):
|
||||
"""Main class to run powercycle subcommand."""
|
||||
|
||||
# Parser command Enum
|
||||
RUN = 1
|
||||
HOST_SETUP = 2
|
||||
SAVE_DIAG = 3
|
||||
REMOTE_HANG_ANALYZER = 4
|
||||
|
||||
def __init__(self, parser_actions, options):
|
||||
"""Initialize."""
|
||||
self.parser_actions = parser_actions
|
||||
@ -25,8 +35,38 @@ class Powercycle(Subcommand):
|
||||
|
||||
def execute(self):
|
||||
"""Execute powercycle test."""
|
||||
return {
|
||||
self.RUN: self._exec_powercycle_main, self.HOST_SETUP: self._exec_powercycle_host_setup,
|
||||
self.SAVE_DIAG: self._exec_powercycle_save_diagnostics,
|
||||
self.REMOTE_HANG_ANALYZER: self._exec_powercycle_hang_analyzer
|
||||
}[self.options.run_option]()
|
||||
|
||||
def _exec_powercycle_main(self):
|
||||
powercycle.main(self.parser_actions, self.options)
|
||||
|
||||
@staticmethod
|
||||
def _exec_powercycle_host_setup():
|
||||
SetUpEC2Instance().execute()
|
||||
|
||||
@staticmethod
|
||||
def _exec_powercycle_save_diagnostics():
|
||||
|
||||
# The event logs on Windows are a useful diagnostic to have when determining if something bad
|
||||
# happened to the remote machine after it was repeatedly crashed during powercycle testing. For
|
||||
# example, the Application and System event logs have previously revealed that the mongod.exe
|
||||
# process abruptly exited due to not being able to open a file despite the process successfully
|
||||
# being restarted and responding to network requests.
|
||||
GatherRemoteEventLogs().execute()
|
||||
TarEC2Artifacts().execute()
|
||||
CopyEC2Artifacts().execute()
|
||||
CopyEC2MonitorFiles().execute()
|
||||
GatherRemoteMongoCoredumps().execute()
|
||||
CopyRemoteMongoCoredumps().execute()
|
||||
|
||||
@staticmethod
|
||||
def _exec_powercycle_hang_analyzer():
|
||||
RunHangAnalyzerOnRemoteInstance().execute()
|
||||
|
||||
|
||||
class PowercyclePlugin(PluginInterface):
|
||||
"""Interface to parsing."""
|
||||
@ -35,9 +75,33 @@ class PowercyclePlugin(PluginInterface):
|
||||
"""Initialize."""
|
||||
self.parser_actions = None
|
||||
|
||||
@staticmethod
|
||||
def _add_powercycle_commands(parent_parser):
|
||||
"""Add sub-subcommands for powercycle."""
|
||||
sub_parsers = parent_parser.add_subparsers(help="powercycle commands")
|
||||
|
||||
setup_parser = sub_parsers.add_parser("setup-host")
|
||||
setup_parser.set_defaults(run_option=Powercycle.HOST_SETUP)
|
||||
|
||||
save_parser = sub_parsers.add_parser("save-diagnostics")
|
||||
save_parser.set_defaults(run_option=Powercycle.SAVE_DIAG)
|
||||
|
||||
save_parser = sub_parsers.add_parser("remote-hang-analyzer")
|
||||
save_parser.set_defaults(run_option=Powercycle.REMOTE_HANG_ANALYZER)
|
||||
|
||||
run_parser = sub_parsers.add_parser("run")
|
||||
run_parser.set_defaults(run_option=Powercycle.RUN)
|
||||
|
||||
# Only need to return run_parser for further processing; others don't need additional args.
|
||||
return run_parser
|
||||
|
||||
def add_subcommand(self, subparsers): # pylint: disable=too-many-statements
|
||||
"""Create and add the parser for the subcommand."""
|
||||
parser = subparsers.add_parser(SUBCOMMAND, help=__doc__, usage="usage")
|
||||
intermediate_parser = subparsers.add_parser(
|
||||
SUBCOMMAND, help=__doc__,
|
||||
usage="MongoDB Powercycle tests; type one of the subcommands for more information")
|
||||
|
||||
parser = self._add_powercycle_commands(intermediate_parser)
|
||||
|
||||
test_options = parser.add_argument_group("Test Options")
|
||||
mongodb_options = parser.add_argument_group("MongoDB Options")
|
||||
|
62
buildscripts/resmokelib/powercycle/lib/__init__.py
Normal file
62
buildscripts/resmokelib/powercycle/lib/__init__.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""Library functions for powercycle."""
|
||||
|
||||
import getpass
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
from buildscripts.resmokelib.plugin import Subcommand
|
||||
from buildscripts.resmokelib.powercycle import powercycle_constants
|
||||
from buildscripts.resmokelib.powercycle.lib.remote_operations import RemoteOperations
|
||||
|
||||
|
||||
class PowercycleCommand(Subcommand): # pylint: disable=abstract-method, too-many-instance-attributes
|
||||
"""Base class for remote operations to set up powercycle."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize PowercycleCommand."""
|
||||
|
||||
self.expansions = yaml.safe_load(open(powercycle_constants.EXPANSIONS_FILE))
|
||||
|
||||
self.retries = 0 if "ssh_retries" not in self.expansions else int(
|
||||
self.expansions["ssh_retries"])
|
||||
self.ssh_identity = self._get_ssh_identity()
|
||||
self.ssh_connection_options = self.ssh_identity + " " + self.expansions[
|
||||
"ssh_connection_options"]
|
||||
self.sudo = "" if self.is_windows() else "sudo"
|
||||
# The username on the Windows image that powercycle uses is currently the default user.
|
||||
self.user = "Administrator" if self.is_windows() else getpass.getuser()
|
||||
self.user_host = self.user + "@" + self.expansions["private_ip_address"]
|
||||
|
||||
self.remote_op = RemoteOperations(
|
||||
user_host=self.user_host,
|
||||
ssh_connection_options=self.ssh_connection_options,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_windows() -> bool:
|
||||
""":return: True if running on Windows."""
|
||||
return sys.platform == "win32" or sys.platform == "cygwin"
|
||||
|
||||
@staticmethod
|
||||
def _call(cmd):
|
||||
cmd = shlex.split(cmd)
|
||||
# Use a common pipe for stdout & stderr for logging.
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
buff_stdout, _ = process.communicate()
|
||||
buff = buff_stdout.decode("utf-8", "replace")
|
||||
return process.poll(), buff
|
||||
|
||||
def _get_posix_workdir(self) -> str:
|
||||
workdir = self.expansions['workdir']
|
||||
if self.is_windows():
|
||||
workdir = workdir.replace("\\", "/")
|
||||
return workdir
|
||||
|
||||
def _get_ssh_identity(self) -> str:
|
||||
workdir = self._get_posix_workdir()
|
||||
pem_file = '/'.join([workdir, 'powercycle.pem'])
|
||||
|
||||
return f"-i {pem_file}"
|
@ -31,7 +31,7 @@ import pymongo
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
from buildscripts.powercycle_setup import remote_operations
|
||||
from buildscripts.resmokelib.powercycle.lib import remote_operations
|
||||
from buildscripts.resmokelib.powercycle import powercycle_config, powercycle_constants
|
||||
|
||||
# See https://docs.python.org/2/library/sys.html#sys.platform
|
||||
@ -1807,7 +1807,7 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many
|
||||
verify_remote_access(local_ops)
|
||||
|
||||
# Pass client_args to the remote script invocation.
|
||||
client_args = "powercycle"
|
||||
client_args = "powercycle run"
|
||||
options_dict = vars(options)
|
||||
for action in parser_actions:
|
||||
option_value = options_dict.get(action.dest, None)
|
||||
|
@ -0,0 +1,57 @@
|
||||
"""Run the hang analyzer on the remote powercycle instance."""
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from buildscripts.resmokelib.powercycle import powercycle_constants
|
||||
from buildscripts.resmokelib.powercycle.lib import PowercycleCommand
|
||||
from buildscripts.resmokelib.powercycle.lib.remote_operations import SSHOperation
|
||||
|
||||
|
||||
class RunHangAnalyzerOnRemoteInstance(PowercycleCommand):
|
||||
"""Run the hang-analyzer on a remote instance."""
|
||||
|
||||
COMMAND = "runHangAnalyzerOnRemoteInstance"
|
||||
|
||||
def execute(self) -> None: # pylint: disable=too-many-locals
|
||||
""":return: None."""
|
||||
if "private_ip_address" not in self.expansions:
|
||||
return
|
||||
hang_analyzer_processes = "dbtest,java,mongo,mongod,mongos,python,_test" if "hang_analyzer_processes" not in self.expansions else self.expansions[
|
||||
"hang_analyzer_processes"]
|
||||
hang_analyzer_option = f"-o file -o stdout -p {hang_analyzer_processes}"
|
||||
hang_analyzer_dump_core = True if "hang_analyzer_dump_core" not in self.expansions else self.expansions[
|
||||
"hang_analyzer_dump_core"]
|
||||
if hang_analyzer_dump_core:
|
||||
hang_analyzer_option = f"-c {hang_analyzer_option}"
|
||||
|
||||
core_ext = "core"
|
||||
if self.is_windows():
|
||||
core_ext = "mdmp"
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
files = self._call("ls")[1].split("\n")
|
||||
dbg_regex = re.compile(r"(\.debug$)|(\.dSYM$)|(\.pdb$)")
|
||||
debug_files = [f for f in files if dbg_regex.match(f)]
|
||||
file_param = []
|
||||
for debug_file in debug_files:
|
||||
file_param.append(debug_file)
|
||||
if file_param:
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, file_param, remote_dir)
|
||||
|
||||
# Activate virtualenv on remote host. The virtualenv bin_dir is different for Linux and
|
||||
# Windows.
|
||||
venv = powercycle_constants.VIRTUALENV_DIR
|
||||
cmds = f"activate=$(find {venv} -name 'activate')"
|
||||
cmds = f"{cmds}; . $activate"
|
||||
|
||||
# In the 'cmds' variable we pass to remote host, use 'python' instead of '$python' since
|
||||
# we don't want to evaluate the local python variable, but instead pass the python string
|
||||
# so the remote host will use the right python when the virtualenv is sourced.
|
||||
cmds = f"{cmds}; cd {remote_dir}"
|
||||
cmds = f"{cmds}; PATH=\"/opt/mongodbtoolchain/gdb/bin:$PATH\" python buildscripts/resmoke.py hang-analyzer {hang_analyzer_option}"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
file_param = []
|
||||
file_param.append(f"{remote_dir}/debugger*.*")
|
||||
file_param.append(f"{remote_dir}/*.{core_ext}")
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, file_param, None)
|
125
buildscripts/resmokelib/powercycle/save_diagnostics/__init__.py
Normal file
125
buildscripts/resmokelib/powercycle/save_diagnostics/__init__.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""Save various diagnostics info from the remote powercycle instance."""
|
||||
|
||||
from buildscripts.resmokelib.powercycle import powercycle_constants
|
||||
from buildscripts.resmokelib.powercycle.lib import PowercycleCommand
|
||||
from buildscripts.resmokelib.powercycle.lib.remote_operations import SSHOperation
|
||||
|
||||
|
||||
class TarEC2Artifacts(PowercycleCommand):
|
||||
"""Tar EC2 artifacts."""
|
||||
|
||||
COMMAND = "tarEC2Artifacts"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
tar_cmd = "tar" if "tar" not in self.expansions else self.expansions["tar"]
|
||||
ec2_artifacts = powercycle_constants.LOG_PATH
|
||||
# On test success, we only archive mongod.log.
|
||||
if self.expansions.get("exit_code", "1") != "0":
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.DB_PATH}"
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.BACKUP_ARTIFACTS}"
|
||||
if self.is_windows():
|
||||
ec2_artifacts = f"{ec2_artifacts} {powercycle_constants.EVENT_LOGPATH}"
|
||||
|
||||
cmd = f"{tar_cmd} czf ec2_artifacts.tgz {ec2_artifacts}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmd, None)
|
||||
|
||||
|
||||
class CopyEC2Artifacts(PowercycleCommand):
|
||||
"""Copy EC2 artifacts."""
|
||||
|
||||
COMMAND = "copyEC2Artifacts"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, "ec2_artifacts.tgz", None)
|
||||
|
||||
|
||||
class GatherRemoteEventLogs(PowercycleCommand):
|
||||
"""
|
||||
Gather remote event logs.
|
||||
|
||||
The event logs on Windows are a useful diagnostic to have when determining if something bad
|
||||
happened to the remote machine after it was repeatedly crashed during powercycle testing. For
|
||||
example, the Application and System event logs have previously revealed that the mongod.exe
|
||||
process abruptly exited due to not being able to open a file despite the process successfully
|
||||
being restarted and responding to network requests.
|
||||
"""
|
||||
|
||||
COMMAND = "gatherRemoteEventLogs"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if not self.is_windows() or self.expansions.get("ec2_ssh_failure", ""):
|
||||
return
|
||||
|
||||
event_logpath = powercycle_constants.EVENT_LOGPATH
|
||||
cmds = f"mkdir -p {event_logpath}"
|
||||
cmds = f"{cmds}; wevtutil qe Application /c:10000 /rd:true /f:Text > {event_logpath}/application.log"
|
||||
cmds = f"{cmds}; wevtutil qe Security /c:10000 /rd:true /f:Text > {event_logpath}/security.log"
|
||||
cmds = f"{cmds}; wevtutil qe System /c:10000 /rd:true /f:Text > {event_logpath}/system.log"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
|
||||
class GatherRemoteMongoCoredumps(PowercycleCommand):
|
||||
"""Gather Remote Mongo Coredumps."""
|
||||
|
||||
COMMAND = "gatherRemoteMongoCoredumps"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if "ec2_ssh_failure" in self.expansions:
|
||||
return
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
# Find all core files and move to $remote_dir
|
||||
cmds = "core_files=$(/usr/bin/find -H . \\( -name '*.core' -o -name '*.mdmp' \\) 2> /dev/null)"
|
||||
cmds = f"{cmds}; if [ -z \"$core_files\" ]; then exit 0; fi"
|
||||
cmds = f"{cmds}; echo Found remote core files $core_files, moving to $(pwd)"
|
||||
cmds = f"{cmds}; for core_file in $core_files"
|
||||
cmds = f"{cmds}; do base_name=$(echo $core_file | sed 's/.*///')"
|
||||
cmds = f"{cmds}; if [ ! -f $base_name ]; then mv $core_file .; fi"
|
||||
cmds = f"{cmds}; done"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, remote_dir)
|
||||
|
||||
|
||||
class CopyRemoteMongoCoredumps(PowercycleCommand):
|
||||
"""Copy Remote Mongo Coredumps."""
|
||||
|
||||
COMMAND = "copyRemoteMongoCoredumps"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
if self.expansions.get("ec2_ssh_failure", ""):
|
||||
return
|
||||
|
||||
if self.is_windows():
|
||||
core_suffix = "mdmp"
|
||||
else:
|
||||
core_suffix = "core"
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
# Core file may not exist so we ignore the return code.
|
||||
self.remote_op.operation(SSHOperation.SHELL, f"{remote_dir}/*.{core_suffix}", None, True)
|
||||
|
||||
|
||||
class CopyEC2MonitorFiles(PowercycleCommand):
|
||||
"""Copy EC2 monitor files."""
|
||||
|
||||
COMMAND = "copyEC2MonitorFiles"
|
||||
|
||||
def execute(self) -> None:
|
||||
""":return: None."""
|
||||
tar_cmd = "tar" if "tar" not in self.expansions else self.expansions["tar"]
|
||||
cmd = f"{tar_cmd} czf ec2_monitor_files.tgz {powercycle_constants.EC2_MONITOR_FILES}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmd, None)
|
||||
self.remote_op.operation(SSHOperation.COPY_FROM, 'ec2_monitor_files.tgz', None)
|
135
buildscripts/resmokelib/powercycle/setup/__init__.py
Normal file
135
buildscripts/resmokelib/powercycle/setup/__init__.py
Normal file
@ -0,0 +1,135 @@
|
||||
"""setup the remote host for powercycle."""
|
||||
|
||||
import os
|
||||
|
||||
from buildscripts.resmokelib.powercycle.lib import PowercycleCommand
|
||||
from buildscripts.resmokelib.powercycle import powercycle_constants
|
||||
from buildscripts.resmokelib.powercycle.lib.remote_operations import SSHOperation
|
||||
|
||||
|
||||
class SetUpEC2Instance(PowercycleCommand):
|
||||
"""Set up EC2 instance."""
|
||||
|
||||
COMMAND = "setUpEC2Instance"
|
||||
|
||||
def execute(self) -> None: # pylint: disable=too-many-instance-attributes, too-many-locals, too-many-statements
|
||||
""":return: None."""
|
||||
|
||||
# First operation -
|
||||
# Create remote_dir.
|
||||
group_cmd = f"id -Gn {self.user}"
|
||||
_, group = self._call(group_cmd)
|
||||
group = group.split(" ")[0]
|
||||
user_group = f"{self.user}:{group}"
|
||||
|
||||
remote_dir = powercycle_constants.REMOTE_DIR
|
||||
db_path = powercycle_constants.DB_PATH
|
||||
|
||||
set_permission_stmt = f"chmod -R 777"
|
||||
if self.is_windows():
|
||||
set_permission_stmt = f"setfacl -s user::rwx,group::rwx,other::rwx"
|
||||
cmds = f"{self.sudo} mkdir -p {remote_dir}; {self.sudo} chown -R {user_group} {remote_dir}; {set_permission_stmt} {remote_dir}; ls -ld {remote_dir}"
|
||||
cmds = f"{cmds}; {self.sudo} mkdir -p {db_path}; {self.sudo} chown -R {user_group} {db_path}; {set_permission_stmt} {db_path}; ls -ld {db_path}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Second operation -
|
||||
# Copy buildscripts and mongoDB executables to the remote host.
|
||||
files = ["etc", "buildscripts", "dist-test/bin"]
|
||||
|
||||
shared_libs = "dist-test/lib"
|
||||
if os.path.isdir(shared_libs):
|
||||
files.append(shared_libs)
|
||||
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir)
|
||||
|
||||
# Third operation -
|
||||
# Set up virtualenv on remote.
|
||||
venv = powercycle_constants.VIRTUALENV_DIR
|
||||
python = "/opt/mongodbtoolchain/v3/bin/python3" if "python" not in self.expansions else self.expansions[
|
||||
"python"]
|
||||
|
||||
cmds = f"python_loc=$(which {python})"
|
||||
cmds = f"{cmds}; remote_dir={remote_dir}"
|
||||
cmds = f"{cmds}; if [ \"Windows_NT\" = \"$OS\" ]; then python_loc=$(cygpath -w $python_loc); remote_dir=$(cygpath -w $remote_dir); fi"
|
||||
cmds = f"{cmds}; virtualenv --python $python_loc --system-site-packages {venv}"
|
||||
cmds = f"{cmds}; activate=$(find {venv} -name 'activate')"
|
||||
cmds = f"{cmds}; . $activate"
|
||||
cmds = f"{cmds}; pip3 install -r $remote_dir/etc/pip/powercycle-requirements.txt"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Fourth operation -
|
||||
# Enable core dumps on non-Windows remote hosts.
|
||||
# The core pattern must specify a director, since mongod --fork will chdir("/")
|
||||
# and cannot generate a core dump there (see SERVER-21635).
|
||||
# We need to reboot the host for the core limits to take effect.
|
||||
if not self.is_windows():
|
||||
core_pattern = f"{remote_dir}/dump_%e.%p.core"
|
||||
sysctl_conf = "/etc/sysctl.conf"
|
||||
cmds = "ulimit -a"
|
||||
cmds = f"{cmds}; echo \"{self.user} - core unlimited\" | {self.sudo} tee -a /etc/security/limits.conf"
|
||||
cmds = f"{cmds}; if [ -f {sysctl_conf} ]"
|
||||
cmds = f"{cmds}; then grep ^kernel.core_pattern {sysctl_conf}"
|
||||
cmds = f"{cmds}; if [ $? -eq 0 ]"
|
||||
cmds = f"{cmds}; then {self.sudo} sed -i \"s,kernel.core_pattern=.*,kernel.core_pattern=$core_pattern,\" {sysctl_conf}"
|
||||
cmds = f"{cmds}; else echo \"kernel.core_pattern={core_pattern}\" | {self.sudo} tee -a {sysctl_conf}"
|
||||
cmds = f"{cmds}; fi"
|
||||
cmds = f"{cmds}; else echo Cannot change the core pattern and no core dumps will be generated."
|
||||
cmds = f"{cmds}; fi"
|
||||
# The following line for restarting the machine is based on
|
||||
# https://unix.stackexchange.com/a/349558 in order to ensure the ssh client gets a
|
||||
# response from the remote machine before it restarts.
|
||||
cmds = f"{cmds}; nohup {self.sudo} reboot &>/dev/null & exit"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
||||
|
||||
# Fifth operation -
|
||||
# Print the ulimit & kernel.core_pattern
|
||||
if not self.is_windows():
|
||||
# Always exit successfully, as this is just informational.
|
||||
cmds = "uptime"
|
||||
cmds = f"{cmds}; ulimit -a"
|
||||
cmds = f"{cmds}; if [ -f /sbin/sysctl ]"
|
||||
cmds = f"{cmds}; then /sbin/sysctl kernel.core_pattern"
|
||||
cmds = f"{cmds}; fi"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None, True)
|
||||
|
||||
# Sixth operation -
|
||||
# Set up curator to collect system & process stats on remote.
|
||||
variant = "windows-64" if self.is_windows() else "ubuntu1604"
|
||||
curator_hash = "b0c3c0fc68bce26d9572796d6bed3af4a298e30e"
|
||||
curator_url = f"https://s3.amazonaws.com/boxes.10gen.com/build/curator/curator-dist-{variant}-{curator_hash}.tar.gz"
|
||||
cmds = f"curl -s {curator_url} | tar -xzv"
|
||||
monitor_system_file = powercycle_constants.MONITOR_SYSTEM_FILE
|
||||
monitor_proc_file = powercycle_constants.MONITOR_PROC_FILE
|
||||
if self.is_windows():
|
||||
# Since curator runs as SYSTEM user, ensure the output files can be accessed.
|
||||
cmds = f"{cmds}; touch {monitor_system_file}; chmod 777 {monitor_system_file}"
|
||||
cmds = f"{cmds}; cygrunsrv --install curator_sys --path curator --chdir $HOME --args 'stat system --file {monitor_system_file}'"
|
||||
cmds = f"{cmds}; touch {monitor_proc_file}; chmod 777 {monitor_proc_file}"
|
||||
cmds = f"{cmds}; cygrunsrv --install curator_proc --path curator --chdir $HOME --args 'stat process-all --file {monitor_proc_file}'"
|
||||
cmds = f"{cmds}; cygrunsrv --start curator_sys"
|
||||
cmds = f"{cmds}; cygrunsrv --start curator_proc"
|
||||
else:
|
||||
cmds = f"{cmds}; touch {monitor_system_file} {monitor_proc_file}"
|
||||
cmds = f"{cmds}; cmd=\"@reboot cd $HOME && {self.sudo} ./curator stat system >> {monitor_system_file}\""
|
||||
cmds = f"{cmds}; (crontab -l ; echo \"$cmd\") | crontab -"
|
||||
cmds = f"{cmds}; cmd=\"@reboot cd $HOME && $sudo ./curator stat process-all >> {monitor_proc_file}\""
|
||||
cmds = f"{cmds}; (crontab -l ; echo \"$cmd\") | crontab -"
|
||||
cmds = f"{cmds}; crontab -l"
|
||||
cmds = f"{cmds}; {{ {self.sudo} $HOME/curator stat system --file {monitor_system_file} > /dev/null 2>&1 & {self.sudo} $HOME/curator stat process-all --file {monitor_proc_file} > /dev/null 2>&1 & }} & disown"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True)
|
||||
|
||||
# Seventh operation -
|
||||
# Install NotMyFault, used to crash Windows.
|
||||
if self.is_windows():
|
||||
windows_crash_zip = powercycle_constants.WINDOWS_CRASH_ZIP
|
||||
windows_crash_dl = powercycle_constants.WINDOWS_CRASH_DL
|
||||
windows_crash_dir = powercycle_constants.WINDOWS_CRASH_DIR
|
||||
|
||||
cmds = f"curl -s -o {windows_crash_zip} {windows_crash_dl}"
|
||||
cmds = f"{cmds}; unzip -q {windows_crash_zip} -d {windows_crash_dir}"
|
||||
cmds = f"{cmds}; chmod +x {windows_crash_dir}/*.exe"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, None)
|
@ -2373,7 +2373,7 @@ functions:
|
||||
# Set an exit trap so we can save the real exit status (see SERVER-34033).
|
||||
trap 'echo $? > error_exit.txt; exit 0' EXIT
|
||||
set +o errexit
|
||||
eval $python -u buildscripts/resmoke.py powercycle \
|
||||
eval $python -u buildscripts/resmoke.py powercycle run \
|
||||
"--sshUserHost=$(printf "%s@%s" "$user" "${private_ip_address}") \
|
||||
--sshConnection=\"-i ${private_key_file} ${ssh_connection_options}\" \
|
||||
--taskName=${task_name}"
|
||||
@ -2631,10 +2631,10 @@ functions:
|
||||
set -o errexit
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py setUpEC2Instance
|
||||
$python buildscripts/resmoke.py powercycle setup-host
|
||||
|
||||
### Process & archive remote EC2 artifacts ###
|
||||
"tar EC2 artifacts": &tar_ec2_artifacts
|
||||
"save powercycle artifacts": &save_powercycle_artifacts
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
@ -2647,79 +2647,7 @@ functions:
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py tarEC2Artifacts
|
||||
|
||||
"copy EC2 artifacts": ©_ec2_artifacts
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
working_dir: src
|
||||
script: |
|
||||
if [ ! -f powercycle_ip_address.yml ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py copyEC2Artifacts
|
||||
|
||||
"copy ec2 monitor files": ©_ec2_monitor_files
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
working_dir: src
|
||||
script: |
|
||||
set -o verbose
|
||||
|
||||
if [ ! -f powercycle_ip_address.yml ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py copyEC2MonitorFiles
|
||||
|
||||
# The event logs on Windows are a useful diagnostic to have when determining if something bad
|
||||
# happened to the remote machine after it was repeatedly crashed during powercycle testing. For
|
||||
# example, the Application and System event logs have previously revealed that the mongod.exe
|
||||
# process abruptly exited due to not being able to open a file despite the process successfully
|
||||
# being restarted and responding to network requests.
|
||||
"gather remote event logs": &gather_remote_event_logs
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
working_dir: src
|
||||
script: |
|
||||
if [ ! -f powercycle_ip_address.yml ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py gatherRemoteEventLogs
|
||||
|
||||
"gather remote mongo coredumps": &gather_remote_mongo_coredumps
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
working_dir: "src"
|
||||
script: |
|
||||
if [ ! -f powercycle_ip_address.yml ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py gatherRemoteMongoCoredumps
|
||||
|
||||
"copy remote mongo coredumps": ©_remote_mongo_coredumps
|
||||
command: shell.exec
|
||||
params:
|
||||
shell: bash
|
||||
working_dir: "src"
|
||||
script: |
|
||||
if [ ! -f powercycle_ip_address.yml ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${activate_virtualenv}
|
||||
$python buildscripts/powercycle_operations.py copyRemoteMongoCoredumps
|
||||
$python buildscripts/resmoke.py powercycle save-diagnostics
|
||||
|
||||
"archive remote EC2 artifacts": &archive_remote_ec2_artifacts
|
||||
command: s3.put
|
||||
@ -2748,12 +2676,7 @@ functions:
|
||||
optional: true
|
||||
|
||||
"save ec2 task artifacts":
|
||||
- *gather_remote_event_logs
|
||||
- *tar_ec2_artifacts
|
||||
- *copy_ec2_artifacts
|
||||
- *copy_ec2_monitor_files
|
||||
- *gather_remote_mongo_coredumps
|
||||
- *copy_remote_mongo_coredumps
|
||||
- *save_powercycle_artifacts
|
||||
- *archive_remote_ec2_artifacts
|
||||
- *archive_remote_ec2_monitor_files
|
||||
|
||||
@ -3105,7 +3028,7 @@ functions:
|
||||
|
||||
# Call hang analyzer for tasks that are running remote mongo processes
|
||||
if [ -n "${private_ip_address}" ]; then
|
||||
$python buildscripts/powercycle_operations.py runHangAnalyzerOnRemoteInstance
|
||||
$python buildscripts/resmoke.py powercycle remote-hang-analyzer
|
||||
fi
|
||||
|
||||
"wait for resmoke to shutdown":
|
||||
|
Loading…
Reference in New Issue
Block a user