0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-24 08:30:56 +01:00
mongodb/site_scons/site_tools/validate_cache_dir.py
Juan Gu 855dfadef0 SERVER-94077 Use isort in Ruff configs (#27865)
GitOrigin-RevId: e793d662774ccd3ab6c3f356c2287cf1f7ff9805
2024-10-10 19:33:49 +00:00

312 lines
11 KiB
Python

# Copyright 2021 MongoDB Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
import datetime
import json
import os
import pathlib
import shutil
import sys
import tempfile
import traceback
from timeit import default_timer as timer
import SCons
cache_debug_suffix = " (target: %s, cachefile: %s) "
class InvalidChecksum(SCons.Errors.BuildError):
def __init__(self, src, dst, reason, cache_csig="", computed_csig=""):
self.message = f"ERROR: md5 checksum {reason} for {src} ({dst})"
self.cache_csig = cache_csig
self.computed_csig = computed_csig
def __str__(self):
return self.message
class CacheTransferFailed(SCons.Errors.BuildError):
def __init__(self, src, dst, reason):
self.message = f"ERROR: cachedir transfer {reason} while transfering {src} to {dst}"
def __str__(self):
return self.message
class UnsupportedError(SCons.Errors.BuildError):
def __init__(self, class_name, feature):
self.message = f"{class_name} does not support {feature}"
def __str__(self):
return self.message
class CacheDirValidate(SCons.CacheDir.CacheDir):
def __init__(self, path):
self.json_log = None
super().__init__(path)
@staticmethod
def get_ext():
# Cache prune script is allowing only directories with this extension
# if this is changed, cache prune script should also be updated.
return ".cksum"
@staticmethod
def get_file_contents_path(default_cachefile_path):
return (
pathlib.Path(default_cachefile_path)
/ pathlib.Path(default_cachefile_path).name.split(".")[0]
)
@staticmethod
def get_bad_cachefile_path(cksum_cachefile_dir):
return pathlib.Path(cksum_cachefile_dir) / "bad_cache_file"
@staticmethod
def get_hash_path(cksum_cachefile_path):
return pathlib.Path(cksum_cachefile_path).parent / "content_hash"
@staticmethod
def get_cachedir_path(path):
return str(pathlib.Path(path + CacheDirValidate.get_ext()))
@classmethod
def copy_from_cache(cls, env, src, dst):
if not str(pathlib.Path(src)).endswith(cls.get_ext()):
return super().copy_from_cache(env, src, dst)
if env.cache_timestamp_newer:
raise UnsupportedError(cls.__name__, "timestamp-newer")
src_file = cls.get_file_contents_path(src)
# using os.path.exists here because: https://bugs.python.org/issue35306
if os.path.exists(str(cls.get_bad_cachefile_path(src))):
raise InvalidChecksum(
cls.get_hash_path(src_file), dst, "cachefile marked as bad checksum"
)
csig = None
try:
with open(cls.get_hash_path(src_file), "rb") as f_out:
csig = f_out.read().decode().strip()
except OSError as ex:
raise InvalidChecksum(
cls.get_hash_path(src_file), dst, f"failed to read hash file: {ex}"
) from ex
else:
if not csig:
raise InvalidChecksum(
cls.get_hash_path(src_file), dst, "no content_hash data found"
)
with tempfile.TemporaryDirectory() as tmpdirname:
dst_tmp = pathlib.Path(tmpdirname) / os.path.basename(dst)
try:
shutil.copy2(src_file, dst_tmp)
except OSError as ex:
raise CacheTransferFailed(src_file, dst, f"failed to copy from cache: {ex}") from ex
else:
shutil.move(dst_tmp, dst)
new_csig = SCons.Util.MD5filesignature(
dst, chunksize=SCons.Node.FS.File.md5_chunksize * 1024
)
if csig != new_csig:
raise InvalidChecksum(
cls.get_hash_path(src_file),
dst,
f"checksums don't match {csig} != {new_csig}",
cache_csig=csig,
computed_csig=new_csig,
)
@classmethod
def copy_to_cache(cls, env, src, dst):
# dst is bsig/file from cachepath method, so
# we make sure to make the bsig dir first
dst = pathlib.Path(dst)
dst_file = dst / dst.name.split(".")[0]
try:
os.makedirs(dst, exist_ok=True)
super().copy_to_cache(env, src, dst_file)
except OSError as ex:
raise CacheTransferFailed(src, dst_file, f"failed to copy to cache: {ex}") from ex
try:
with open(cls.get_hash_path(dst_file), "w") as f_out:
f_out.write(env.File(src).get_content_hash())
except OSError as ex:
raise CacheTransferFailed(src, dst_file, f"failed to create hash file: {ex}") from ex
def log_json_cachedebug(self, node, pushing=False, duration=0):
if pushing and (node.nocache or SCons.CacheDir.cache_readonly or "conftest" in str(node)):
return
cachefile = self.get_file_contents_path(self.cachepath(node)[1])
if node.fs.exists(cachefile):
cache_event = "double_push" if pushing else "hit"
else:
cache_event = "push" if pushing else "miss"
self.CacheDebugJson({"type": cache_event}, node, cachefile, duration)
def retrieve(self, node):
if not self.is_enabled():
return False
try:
start = timer()
result = super().retrieve(node)
self.log_json_cachedebug(node, duration=timer() - start)
return result
except InvalidChecksum as ex:
self.print_cache_issue(node, ex)
self.clean_bad_cachefile(node, ex.cache_csig, ex.computed_csig)
return False
except (UnsupportedError, CacheTransferFailed) as ex:
self.print_cache_issue(node, ex)
return False
def push(self, node):
if self.is_readonly() or not self.is_enabled():
return
try:
start = timer()
result = super().push(node)
self.log_json_cachedebug(node, pushing=True, duration=timer() - start)
return result
except CacheTransferFailed as ex:
self.print_cache_issue(node, ex)
return False
def CacheDebugJson(self, json_data, target, cachefile, duration, size=None):
if (
SCons.CacheDir.cache_debug
and SCons.CacheDir.cache_debug != "-"
and self.json_log is None
):
self.json_log = open(SCons.CacheDir.cache_debug + ".json", "a")
if self.json_log is not None:
if size is None:
try:
size = os.path.getsize(cachefile)
except FileNotFoundError:
size = "FileNotFoundError"
except NotADirectoryError:
size = "NotADirectoryError"
cksum_cachefile = str(pathlib.Path(cachefile).parent)
if cksum_cachefile.endswith(self.get_ext()):
cachefile = cksum_cachefile
json_data.update(
{
"timestamp": str(datetime.datetime.now(datetime.timezone.utc)),
"duration": duration,
"size": size,
"realfile": str(target),
"cachefile": pathlib.Path(cachefile).name,
"cache_dir": str(pathlib.Path(cachefile).parent.parent),
}
)
# capture exception information
if sys.exc_info()[1]:
json_data.update({"error": self._format_exception_msg()})
self.json_log.write(json.dumps(json_data) + "\n")
def CacheDebug(self, fmt, target, cachefile):
super().CacheDebug(fmt, target, cachefile)
# Capture exception information into the cache debug log
if sys.exc_info()[1] and self.debugFP:
self.debugFP.write(self._format_exception_msg())
def _format_exception_msg(self):
return (
"An exception was detected while using the cache:\n"
+ " "
+ "\n ".join("".join(traceback.format_exc()).split("\n"))
) + "\n"
def _log(self, log_msg, json_info, realnode, cachefile):
self.CacheDebug(log_msg + cache_debug_suffix, realnode, cachefile)
self.CacheDebugJson(json_info, realnode, cachefile, 0)
def print_cache_issue(self, node, ex):
cksum_dir = pathlib.Path(self.cachepath(node)[1])
self._log(str(ex), {"type": "error"}, node, cksum_dir)
def clean_bad_cachefile(self, node, cache_csig, computed_csig):
cksum_dir = pathlib.Path(self.cachepath(node)[1])
rm_path = f"{cksum_dir}.{SCons.CacheDir.cache_tmp_uuid}.del"
try:
try:
pathlib.Path(self.get_bad_cachefile_path(cksum_dir)).touch()
except FileExistsError:
pass
cksum_dir.replace(rm_path)
except OSError as ex:
msg = f"Failed to rename {cksum_dir} to {rm_path}: {ex}"
self._log(msg, {"type": "error"}, node, cksum_dir)
return
msg = f"Removed bad cachefile {cksum_dir} found in cache."
self._log(
msg,
{
"type": "invalid_checksum",
"cache_csig": cache_csig,
"computed_csig": computed_csig,
},
node,
cksum_dir,
)
def get_cachedir_csig(self, node):
cachedir, cachefile = self.cachepath(node)
if cachefile and os.path.exists(cachefile):
with open(self.get_hash_path(self.get_file_contents_path(cachefile)), "rb") as f_out:
return f_out.read().decode()
def cachepath(self, node):
if not self.is_enabled():
return None, None
dir, path = super().cachepath(node)
if node.fs.exists(path):
return dir, path
return dir, str(self.get_cachedir_path(path))
def exists(env):
return True
def generate(env):
if not env.get("CACHEDIR_CLASS"):
env["CACHEDIR_CLASS"] = CacheDirValidate