Login

Mask sensitive data from logger

Author:
agusmakmun
Posted:
March 19, 2025
Language:
Python
Version:
3.1
Score:
0 (after 0 ratings)

This will help to secure the sensitive secrets, token, api keys, etc from logger.

As we know there is security issue when we include the sensitive information to the logger in case logger got leaked/hacked.

Before:

INFO ('192.168.1.1', 33321) - "WebSocket /ssh?token=abcdefg&width=20&heigh20"

After:

INFO ('192.168.1.1', 33321) - "WebSocket /ssh?token=********&width=20&heigh20"

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# 1. settings.py
# to mask sensitive data in the log messages
SENSITIVE_PARAMS = [
    "token",
    "sessionid",
    "authorization",
    "cookie",
    "load_test_token",
    "access_token",
    "refresh_token",
    "api_key",
    "api-key",
    "password",
    "sec-websocket-key",
    "key",
    "secret",
    "csrftoken",
    "csrfmiddlewaretoken",
]

LOGGING = {
    "version": 1,
    "disable_existing_loggers": False,
    "formatters": {
        "verbose": {
            "format": "%(levelname)s %(asctime)s %(module)s "
            "%(process)d %(thread)d [%(filename)s:%(lineno)d] %(message)s",
        },
        "masking": {
            "()": "path.to.log_formatter.MaskingSensitiveDataFormatter",
            "format": "%(levelname)s %(asctime)s %(module)s "
            "%(process)d %(thread)d [%(filename)s:%(lineno)d] %(message)s",
        },
    },
    "handlers": {
        "console": {
            "level": "DEBUG",
            "class": "logging.StreamHandler",
            "formatter": "masking",
        },
    },
    "root": {
        "level": "INFO",
        "handlers": ["console"],
    },
    "loggers": {
        "uvicorn.access": {
            "handlers": ["console"],
            "level": "INFO",
            "propagate": False,
        },
        "uvicorn.error": {
            "handlers": ["console"],
            "level": "INFO",
            "propagate": False,
        },
    },
}


# 2. log_formatter.py
import re
from contextlib import suppress
from logging import Formatter
from typing import Any

from django.conf import settings


class MaskingSensitiveDataFormatter(Formatter):
    """Custom formatter that masks sensitive data in log messages.

    This formatter extends the standard logging Formatter to automatically mask
    sensitive information like tokens, passwords, and API keys in log messages.
    It handles both URL parameters and general text content.

    Attributes:
        SENSITIVE_PARAMS (list[str]): List of parameter names that contain sensitive data.
        URL_PATTERN (re.Pattern): Compiled regex pattern for matching sensitive URL parameters.
    """  # noqa: E501

    URL_PATTERN = re.compile(
        r"([?&])([^=]+)=[^&\s]+",
        re.IGNORECASE,
    )

    def __init__(
        self,
        fmt: str | None = None,
        datefmt: str | None = None,
    ) -> None:
        """Initialize the formatter.

        Args:
            fmt (str | None): The format string for the log message.
            datefmt (str | None): The format string for the date/time portion.
        """
        super().__init__(fmt, datefmt)

    def format(self, record: Any) -> str:
        """Format the log record with masked sensitive data.

        This method processes the log record and masks any sensitive information
        found in the record's arguments. It handles both tuple and dictionary
        arguments, and preserves the original record by working on a copy.

        Args:
            record (Any): The log record to format.

        Returns:
            str: The formatted log message with sensitive data masked.
                If an error occurs during formatting, returns an error message.
        """
        try:
            # Make a copy of the record to avoid modifying the original
            if hasattr(record, "args") and isinstance(record.args, tuple | dict):
                record_copy = self._copy_record(record)

                if isinstance(record_copy.args, tuple) and len(record_copy.args) > 0:
                    args_list = list(record_copy.args)
                    for i, arg in enumerate(args_list):
                        if isinstance(arg, str):
                            args_list[i] = self._mask_sensitive_data(arg)
                    record_copy.args = tuple(args_list)
                elif isinstance(record_copy.args, dict):
                    for key, value in record_copy.args.items():
                        if isinstance(value, str):
                            record_copy.args[key] = self._mask_sensitive_data(value)

                return super().format(record_copy)
            return super().format(record)
        except Exception as e:  # noqa: BLE001
            # If any error occurs during formatting,
            # return a basic formatted message
            return f"Error in log formatting: {e!s}"

    def _copy_record(self, record: Any) -> Any:
        """Create a copy of the log record.

        This method safely creates a copy of the log record to avoid modifying
        the original. If copying fails, it returns the original record.

        Args:
            record (Any): The log record to copy.

        Returns:
            Any: A copy of the log record, or the original record if copying fails.
        """
        with suppress(Exception):
            import copy

            return copy.copy(record)
        return record

    def _mask_sensitive_data(self, text: str) -> str:
        """Mask sensitive data in the given text.

        This method processes the input text and masks any sensitive information
        found in URL parameters. It uses the URL_PATTERN to identify and mask
        sensitive parameters.

        Args:
            text (str): The text containing potentially sensitive data.

        Returns:
            str: The text with sensitive data masked, or the original text if
                masking fails or no sensitive data is found.
        """

        def mask_match(match: re.Match) -> str:
            param_prefix = match.group(1)  # ? or &
            param_name = match.group(2).lower()  # parameter name
            if any(
                param_name.endswith(param.lower())
                for param in settings.SENSITIVE_PARAMS
            ):
                return f"{param_prefix}{param_name}=********"
            return match.group(0)

        with suppress(Exception):
            return self.URL_PATTERN.sub(mask_match, text)
        return text

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 1 year, 2 months ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 1 year, 2 months ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 9 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 9 months ago
  5. Help text hyperlinks by sa2812 1 year, 10 months ago

Comments

Please login first before commenting.