mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-24 00:17:37 +01:00
855dfadef0
GitOrigin-RevId: e793d662774ccd3ab6c3f356c2287cf1f7ff9805
157 lines
4.9 KiB
Python
Executable File
157 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Check files in git diff to ensure they are within a given size limit."""
|
|
|
|
# pylint: disable=wrong-import-position
|
|
|
|
import argparse
|
|
import fnmatch
|
|
import logging
|
|
import os
|
|
import pathlib
|
|
import sys
|
|
import textwrap
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import structlog
|
|
from git import Repo
|
|
|
|
mongo_dir = os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__))))
|
|
# Get relative imports to work when the package is not installed on the PYTHONPATH.
|
|
if __name__ == "__main__" and __package__ is None:
|
|
sys.path.append(mongo_dir)
|
|
|
|
from buildscripts.linter import git
|
|
from buildscripts.patch_builds.change_data import (
|
|
RevisionMap,
|
|
find_changed_files_in_repos,
|
|
generate_revision_map,
|
|
)
|
|
|
|
|
|
# Console renderer for structured logging
|
|
def renderer(_logger: logging.Logger, _name: str, eventdict: Dict[Any, Any]) -> str:
|
|
if "files" in eventdict:
|
|
return "{event}: {files}".format(**eventdict)
|
|
if "repo" in eventdict:
|
|
return "{event}: {repo}".format(**eventdict)
|
|
if "file" in eventdict:
|
|
if "bytes" in eventdict:
|
|
return "{event}: {file} {bytes} bytes".format(**eventdict)
|
|
return "{event}: {file}".format(**eventdict)
|
|
return "{event}".format(**eventdict)
|
|
|
|
|
|
# Configure the logger so it doesn't spam output on huge diffs
|
|
structlog.configure(
|
|
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
wrapper_class=structlog.stdlib.BoundLogger,
|
|
cache_logger_on_first_use=True,
|
|
processors=[
|
|
structlog.stdlib.filter_by_level,
|
|
renderer,
|
|
],
|
|
)
|
|
|
|
LOGGER = structlog.get_logger(__name__)
|
|
MONGO_REVISION_ENV_VAR = "REVISION"
|
|
|
|
|
|
def _get_repos_and_revisions() -> Tuple[List[Repo], RevisionMap]:
|
|
"""Get the repo object and a map of revisions to compare against."""
|
|
modules = git.get_module_paths()
|
|
|
|
repos = [
|
|
Repo(path)
|
|
for path in modules
|
|
# Exclude enterprise module; it's in the "modules" folder but does not correspond to a repo
|
|
if "src/mongo/db/modules/enterprise" not in path
|
|
]
|
|
|
|
revision_map = generate_revision_map(repos, {"mongo": os.environ.get(MONGO_REVISION_ENV_VAR)})
|
|
return repos, revision_map
|
|
|
|
|
|
def git_changed_files(excludes: List[pathlib.Path]) -> List[pathlib.Path]:
|
|
"""
|
|
Get the files that have changes since the last git commit.
|
|
|
|
:param excludes: A list of files which should be excluded from changed file checks.
|
|
:return: List of changed files.
|
|
"""
|
|
repos, revision_map = _get_repos_and_revisions()
|
|
LOGGER.debug("revisions", revision=revision_map)
|
|
|
|
def _filter_fn(file_path: pathlib.Path) -> bool:
|
|
if not file_path.exists():
|
|
return False
|
|
for exclude in excludes:
|
|
if fnmatch.fnmatch(file_path, exclude):
|
|
return False
|
|
return True
|
|
|
|
files = [
|
|
filename
|
|
for filename in list(map(pathlib.Path, find_changed_files_in_repos(repos, revision_map)))
|
|
if _filter_fn(filename)
|
|
]
|
|
|
|
LOGGER.debug("Found files to check", files=list(map(str, files)))
|
|
return files
|
|
|
|
|
|
def diff_file_sizes(size_limit: int, excludes: Optional[List[str]] = None) -> List[pathlib.Path]:
|
|
if excludes is None:
|
|
excludes = []
|
|
|
|
large_files: list[pathlib.Path] = []
|
|
|
|
for file_path in git_changed_files(excludes):
|
|
LOGGER.debug("Checking file size", file=str(file_path))
|
|
file_size = file_path.stat().st_size
|
|
if file_size > size_limit:
|
|
LOGGER.error("File too large", file=str(file_path), bytes=file_size)
|
|
large_files.append(file_path)
|
|
|
|
return large_files
|
|
|
|
|
|
def main(*args: str) -> int:
|
|
"""Execute Main entry point."""
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Git commit large file checker.",
|
|
epilog=textwrap.dedent("""\
|
|
NOTE: The --exclude argument is an exact match but can accept glob patterns. If * is used,
|
|
it matches *all* characters, including path separators.
|
|
"""),
|
|
)
|
|
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
|
parser.add_argument(
|
|
"--exclude",
|
|
help="Paths to exclude from check",
|
|
nargs="+",
|
|
type=pathlib.Path,
|
|
required=False,
|
|
)
|
|
parser.add_argument("--size-mb", help="File size limit (MiB)", type=int, default="10")
|
|
parsed_args = parser.parse_args(args[1:])
|
|
|
|
if parsed_args.verbose:
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
structlog.stdlib.filter_by_level(LOGGER, "debug", {})
|
|
else:
|
|
logging.basicConfig(level=logging.INFO)
|
|
structlog.stdlib.filter_by_level(LOGGER, "info", {})
|
|
|
|
large_files = diff_file_sizes(parsed_args.size_mb * 1024 * 1024, parsed_args.exclude)
|
|
if len(large_files) == 0:
|
|
LOGGER.info("All files passed size check")
|
|
return 0
|
|
|
|
LOGGER.error("Some files failed size check", files=list(map(str, large_files)))
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(*sys.argv))
|