#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging import os import socket import struct import sys import threading import time import structlog from prometheus_client import CollectorRegistry, Gauge, multiprocess, start_http_server loglevel = "error" keepalive = 120 # Set the timeout to something lower than any downstreams, such that if the # timeout is hit, then the worker will be killed and respawned, which will then # we able to pick up any connections that were previously pending on the socket # and serve the requests before the downstream timeout. timeout = 15 grateful_timeout = 120 METRICS_UPDATE_INTERVAL_SECONDS = int(os.getenv("GUNICORN_METRICS_UPDATE_SECONDS", 5)) def when_ready(server): """ To ease being able to hide the /metrics endpoint when running in production, we serve the metrics on a separate port, using the prometheus_client.multiprocess Collector to pull in data from the worker processes. """ registry = CollectorRegistry() multiprocess.MultiProcessCollector(registry) port = int(os.environ.get("PROMETHEUS_METRICS_EXPORT_PORT", 8001)) start_http_server(port=port, registry=registry) # Start a thread in the Arbiter that will monitor the backlog on the sockets # Gunicorn is listening on. socket_monitor = SocketMonitor(server=server, registry=registry) socket_monitor.start() def post_fork(server, worker): """ Within each worker process, start a thread that will monitor the thread and connection pool. """ worker_monitor = WorkerMonitor(worker=worker) worker_monitor.start() def worker_exit(server, worker): """ Ensure that we mark workers as dead with the prometheus_client such that any cleanup can happen. """ multiprocess.mark_process_dead(worker.pid) class SocketMonitor(threading.Thread): """ We have enabled the statsd collector for Gunicorn, but this doesn't include the backlog due to concerns over portability, see https://github.com/benoitc/gunicorn/pull/2407 Instead, we expose to Prometheus a gauge that will report the backlog size. We can then: 1. use this to monitor how well the Gunicorn instances are keeping up with requests. 2. use this metric to handle HPA scaling e.g. in Kubernetes """ def __init__(self, server, registry): super().__init__() self.daemon = True self.server = server self.registry = registry def run(self): """ Every X seconds, check to see how many connections are pending for each server socket. We label each individually, as limits such as `--backlog` will apply to each individually. """ if sys.platform != "linux": # We use the assumption that we are on Linux to be able to get the # socket backlog, so if we're not on Linux, we return immediately. return backlog_gauge = Gauge( "gunicorn_pending_connections", "The number of pending connections on all sockets. Linux only.", registry=self.registry, labelnames=["listener"], ) while True: for sock in self.server.LISTENERS: backlog = self.get_backlog(sock=sock) backlog_gauge.labels(listener=str(sock)).set(backlog) time.sleep(METRICS_UPDATE_INTERVAL_SECONDS) def get_backlog(self, sock): # tcp_info struct from include/uapi/linux/tcp.h fmt = "B" * 8 + "I" * 24 tcp_info_struct = sock.getsockopt(socket.IPPROTO_TCP, socket.TCP_INFO, 104) # 12 is tcpi_unacked return struct.unpack(fmt, tcp_info_struct)[12] class WorkerMonitor(threading.Thread): """ There is a statsd logger support in Gunicorn that allows us to gather metrics e.g. on the number of workers, requests, request duration etc. See https://docs.gunicorn.org/en/stable/instrumentation.html for details. To get a better understanding of the pool utilization, number of accepted connections, we start a thread in head worker to report these via prometheus metrics. """ def __init__(self, worker): super().__init__() self.daemon = True self.worker = worker def run(self): """ Every X seconds, check the status of the Thread pool, as well as the """ active_worker_connections = Gauge( "gunicorn_active_worker_connections", "Number of active connections.", labelnames=["pid"], ) max_worker_connections = Gauge( "gunicorn_max_worker_connections", "Maximum worker connections.", labelnames=["pid"], ) total_threads = Gauge( "gunicorn_max_worker_threads", "Size of the thread pool per worker.", labelnames=["pid"], ) active_threads = Gauge( "gunicorn_active_worker_threads", "Number of threads actively processing requests.", labelnames=["pid"], ) pending_requests = Gauge( "gunicorn_pending_requests", "Number of requests that have been read from a connection but have not completed yet", labelnames=["pid"], ) max_worker_connections.labels(pid=self.worker.pid).set(self.worker.cfg.worker_connections) total_threads.labels(pid=self.worker.pid).set(self.worker.cfg.threads) while True: active_worker_connections.labels(pid=self.worker.pid).set(self.worker.nr_conns) active_threads.labels(pid=self.worker.pid).set(min(self.worker.cfg.threads, len(self.worker.futures))) pending_requests.labels(pid=self.worker.pid).set(len(self.worker.futures)) time.sleep(METRICS_UPDATE_INTERVAL_SECONDS) LOGGING_FORMATTER_NAME = os.getenv("LOGGING_FORMATTER_NAME", "default") # Setup stdlib logging to be handled by Structlog def add_pid_and_tid( logger: logging.Logger, method_name: str, event_dict: structlog.types.EventDict ) -> structlog.types.EventDict: event_dict["pid"] = os.getpid() event_dict["tid"] = threading.get_ident() return event_dict pre_chain = [ # Add the log level and a timestamp to the event_dict if the log entry # is not from structlog. structlog.stdlib.add_log_level, structlog.stdlib.add_logger_name, add_pid_and_tid, structlog.processors.TimeStamper(fmt="iso"), ] # This is a copy the default logging config for gunicorn but with additions to: # # 1. non propagate loggers to the root handlers (otherwise we get duplicate log # lines) # 2. use structlog for processing of log records # # See # https://github.com/benoitc/gunicorn/blob/0b953b803786997d633d66c0f7c7b290df75e07c/gunicorn/glogging.py#L48 # for the default log settings. logconfig_dict = { "version": 1, "disable_existing_loggers": True, "formatters": { "default": { "()": structlog.stdlib.ProcessorFormatter, "processor": structlog.dev.ConsoleRenderer(colors=True), "foreign_pre_chain": pre_chain, }, "json": { "()": structlog.stdlib.ProcessorFormatter, "processor": structlog.processors.JSONRenderer(), "foreign_pre_chain": pre_chain, }, }, "root": {"level": "INFO", "handlers": ["console"]}, "loggers": { "gunicorn.error": { "level": "INFO", "handlers": ["error_console"], "propagate": False, "qualname": "gunicorn.error", }, "gunicorn.access": { "level": "INFO", "handlers": ["console"], "propagate": False, "qualname": "gunicorn.access", }, }, "handlers": { "error_console": { "class": "logging.StreamHandler", "formatter": LOGGING_FORMATTER_NAME, "stream": "ext://sys.stderr", }, "console": { "class": "logging.StreamHandler", "formatter": LOGGING_FORMATTER_NAME, "stream": "ext://sys.stdout", }, }, }