Mask sensitive data from logger

Author:: agusmakmun
Posted:: March 19, 2025
Language:: Python
Version:: 3.1
Score:: 0 (after 0 ratings)
Download
Raw
This will help to secure the sensitive secrets, token, api keys, etc from logger.
As we know there is security issue when we include the sensitive information to the logger in case logger got leaked/hacked.
Before:
INFO ('192.168.1.1', 33321) - "WebSocket /ssh?token=abcdefg&width=20&heigh20"
After:
INFO ('192.168.1.1', 33321) - "WebSocket /ssh?token=********&width=20&heigh20"
# 1. settings.py
# to mask sensitive data in the log messages
SENSITIVE_PARAMS = [
    "token",
    "sessionid",
    "authorization",
    "cookie",
    "load_test_token",
    "access_token",
    "refresh_token",
    "api_key",
    "api-key",
    "password",
    "sec-websocket-key",
    "key",
    "secret",
    "csrftoken",
    "csrfmiddlewaretoken",
]

LOGGING = {
    "version": 1,
    "disable_existing_loggers": False,
    "formatters": {
        "verbose": {
            "format": "%(levelname)s %(asctime)s %(module)s "
            "%(process)d %(thread)d [%(filename)s:%(lineno)d] %(message)s",
        },
        "masking": {
            "()": "path.to.log_formatter.MaskingSensitiveDataFormatter",
            "format": "%(levelname)s %(asctime)s %(module)s "
            "%(process)d %(thread)d [%(filename)s:%(lineno)d] %(message)s",
        },
    },
    "handlers": {
        "console": {
            "level": "DEBUG",
            "class": "logging.StreamHandler",
            "formatter": "masking",
        },
    },
    "root": {
        "level": "INFO",
        "handlers": ["console"],
    },
    "loggers": {
        "uvicorn.access": {
            "handlers": ["console"],
            "level": "INFO",
            "propagate": False,
        },
        "uvicorn.error": {
            "handlers": ["console"],
            "level": "INFO",
            "propagate": False,
        },
    },
}


# 2. log_formatter.py
import re
from contextlib import suppress
from logging import Formatter
from typing import Any

from django.conf import settings


class MaskingSensitiveDataFormatter(Formatter):
    """Custom formatter that masks sensitive data in log messages.

    This formatter extends the standard logging Formatter to automatically mask
    sensitive information like tokens, passwords, and API keys in log messages.
    It handles both URL parameters and general text content.

    Attributes:
        SENSITIVE_PARAMS (list[str]): List of parameter names that contain sensitive data.
        URL_PATTERN (re.Pattern): Compiled regex pattern for matching sensitive URL parameters.
    """  # noqa: E501

    URL_PATTERN = re.compile(
        r"([?&])([^=]+)=[^&\s]+",
        re.IGNORECASE,
    )

    def __init__(
        self,
        fmt: str | None = None,
        datefmt: str | None = None,
    ) -> None:
        """Initialize the formatter.

        Args:
            fmt (str | None): The format string for the log message.
            datefmt (str | None): The format string for the date/time portion.
        """
        super().__init__(fmt, datefmt)

    def format(self, record: Any) -> str:
        """Format the log record with masked sensitive data.

        This method processes the log record and masks any sensitive information
        found in the record's arguments. It handles both tuple and dictionary
        arguments, and preserves the original record by working on a copy.

        Args:
            record (Any): The log record to format.

        Returns:
            str: The formatted log message with sensitive data masked.
                If an error occurs during formatting, returns an error message.
        """
        try:
            # Make a copy of the record to avoid modifying the original
            if hasattr(record, "args") and isinstance(record.args, tuple | dict):
                record_copy = self._copy_record(record)

                if isinstance(record_copy.args, tuple) and len(record_copy.args) > 0:
                    args_list = list(record_copy.args)
                    for i, arg in enumerate(args_list):
                        if isinstance(arg, str):
                            args_list[i] = self._mask_sensitive_data(arg)
                    record_copy.args = tuple(args_list)
                elif isinstance(record_copy.args, dict):
                    for key, value in record_copy.args.items():
                        if isinstance(value, str):
                            record_copy.args[key] = self._mask_sensitive_data(value)

                return super().format(record_copy)
            return super().format(record)
        except Exception as e:  # noqa: BLE001
            # If any error occurs during formatting,
            # return a basic formatted message
            return f"Error in log formatting: {e!s}"

    def _copy_record(self, record: Any) -> Any:
        """Create a copy of the log record.

        This method safely creates a copy of the log record to avoid modifying
        the original. If copying fails, it returns the original record.

        Args:
            record (Any): The log record to copy.

        Returns:
            Any: A copy of the log record, or the original record if copying fails.
        """
        with suppress(Exception):
            import copy

            return copy.copy(record)
        return record

    def _mask_sensitive_data(self, text: str) -> str:
        """Mask sensitive data in the given text.

        This method processes the input text and masks any sensitive information
        found in URL parameters. It uses the URL_PATTERN to identify and mask
        sensitive parameters.

        Args:
            text (str): The text containing potentially sensitive data.

        Returns:
            str: The text with sensitive data masked, or the original text if
                masking fails or no sensitive data is found.
        """

        def mask_match(match: re.Match) -> str:
            param_prefix = match.group(1)  # ? or &
            param_name = match.group(2).lower()  # parameter name
            if any(
                param_name.endswith(param.lower())
                for param in settings.SENSITIVE_PARAMS
            ):
                return f"{param_prefix}{param_name}=********"
            return match.group(0)

        with suppress(Exception):
            return self.URL_PATTERN.sub(mask_match, text)
        return text
Comments

Please login first before commenting.
Mask sensitive data from logger

More like this

Comments