lakehouse_engine.utils.logging_handler

Module to configure project logging.

 1"""Module to configure project logging."""
 2
 3import logging
 4import re
 5
 6FORMATTER = logging.Formatter("%(asctime)s%(name)s%(levelname)s%(message)s")
 7SENSITIVE_KEYS_REG = [
 8    {  # Enclosed in ''.
 9        # Stops replacing when it finds comma and space, space or end of line.
10        "regex": r"'(kafka\.ssl\.keystore\.password|kafka\.ssl\.truststore\.password"
11        r"|password|secret|credential|credentials|pass|key)'[ ]*:"
12        r"[ ]*'.*?(, | |}|$)",
13        "replace": "'masked_cred': '******', ",
14    },
15    {  # Enclosed in "".
16        # Stops replacing when it finds comma and space, space or end of line.
17        "regex": r'"(kafka\.ssl\.keystore\.password|kafka\.ssl\.truststore\.password'
18        r'|password|secret|credential|credentials|pass|key)"[ ]*:'
19        r'[ ]*".*?(, | |}|$)',
20        "replace": '"masked_cred": "******", ',
21    },
22    {  # Not enclosed in '' or "".
23        # Stops replacing when it finds comma and space, space or end of line.
24        "regex": r"(kafka\.ssl\.keystore\.password|kafka\.ssl\.truststore\.password"
25        r"|password|secret|credential|credentials|pass|key)[ ]*:"
26        r"[ ]*.*?(, | |}|$)",
27        "replace": "masked_cred: ******, ",
28    },
29]
30
31
32class FilterSensitiveData(logging.Filter):
33    """Logging filter to hide sensitive data from being shown in the logs."""
34
35    def filter(self, record: logging.LogRecord) -> bool:  # noqa: A003
36        """Hide sensitive information from being shown in the logs.
37
38        Based on the configured regex and replace strings, the content of the log
39        records is replaced and then all the records are allowed to be logged
40        (return True).
41
42        Args:
43            record: the LogRecord event being logged.
44
45        Returns:
46            The transformed record to be logged.
47        """
48        for key_reg in SENSITIVE_KEYS_REG:
49            record.msg = re.sub(key_reg["regex"], key_reg["replace"], str(record.msg))
50        return True
51
52
53class LoggingHandler(object):
54    """Handle the logging of the lakehouse engine project."""
55
56    def __init__(self, class_name: str):
57        """Construct a LoggingHandler instance.
58
59        Args:
60            class_name: name of the class to be indicated in the logs.
61        """
62        self._logger: logging.Logger = logging.getLogger(class_name)
63        self._logger.setLevel(logging.DEBUG)
64        self._logger.addFilter(FilterSensitiveData())
65        lsh = logging.StreamHandler()
66        lsh.setLevel(logging.DEBUG)
67        lsh.setFormatter(FORMATTER)
68        if not self._logger.hasHandlers():
69            # avoid keep adding handlers and therefore duplicate messages
70            self._logger.addHandler(lsh)
71
72    def get_logger(self) -> logging.Logger:
73        """Get the _logger instance variable.
74
75        Returns:
76            logging.Logger: the logger object.
77        """
78        return self._logger
FORMATTER = <logging.Formatter object>
SENSITIVE_KEYS_REG = [{'regex': "'(kafka\\.ssl\\.keystore\\.password|kafka\\.ssl\\.truststore\\.password|password|secret|credential|credentials|pass|key)'[ ]*:[ ]*'.*?(, | |}|$)", 'replace': "'masked_cred': '******', "}, {'regex': '"(kafka\\.ssl\\.keystore\\.password|kafka\\.ssl\\.truststore\\.password|password|secret|credential|credentials|pass|key)"[ ]*:[ ]*".*?(, | |}|$)', 'replace': '"masked_cred": "******", '}, {'regex': '(kafka\\.ssl\\.keystore\\.password|kafka\\.ssl\\.truststore\\.password|password|secret|credential|credentials|pass|key)[ ]*:[ ]*.*?(, | |}|$)', 'replace': 'masked_cred: ******, '}]
class FilterSensitiveData(logging.Filter):
33class FilterSensitiveData(logging.Filter):
34    """Logging filter to hide sensitive data from being shown in the logs."""
35
36    def filter(self, record: logging.LogRecord) -> bool:  # noqa: A003
37        """Hide sensitive information from being shown in the logs.
38
39        Based on the configured regex and replace strings, the content of the log
40        records is replaced and then all the records are allowed to be logged
41        (return True).
42
43        Args:
44            record: the LogRecord event being logged.
45
46        Returns:
47            The transformed record to be logged.
48        """
49        for key_reg in SENSITIVE_KEYS_REG:
50            record.msg = re.sub(key_reg["regex"], key_reg["replace"], str(record.msg))
51        return True

Logging filter to hide sensitive data from being shown in the logs.

def filter(self, record: logging.LogRecord) -> bool:
36    def filter(self, record: logging.LogRecord) -> bool:  # noqa: A003
37        """Hide sensitive information from being shown in the logs.
38
39        Based on the configured regex and replace strings, the content of the log
40        records is replaced and then all the records are allowed to be logged
41        (return True).
42
43        Args:
44            record: the LogRecord event being logged.
45
46        Returns:
47            The transformed record to be logged.
48        """
49        for key_reg in SENSITIVE_KEYS_REG:
50            record.msg = re.sub(key_reg["regex"], key_reg["replace"], str(record.msg))
51        return True

Hide sensitive information from being shown in the logs.

Based on the configured regex and replace strings, the content of the log records is replaced and then all the records are allowed to be logged (return True).

Arguments:
  • record: the LogRecord event being logged.
Returns:

The transformed record to be logged.

Inherited Members
logging.Filter
Filter
name
nlen
class LoggingHandler:
54class LoggingHandler(object):
55    """Handle the logging of the lakehouse engine project."""
56
57    def __init__(self, class_name: str):
58        """Construct a LoggingHandler instance.
59
60        Args:
61            class_name: name of the class to be indicated in the logs.
62        """
63        self._logger: logging.Logger = logging.getLogger(class_name)
64        self._logger.setLevel(logging.DEBUG)
65        self._logger.addFilter(FilterSensitiveData())
66        lsh = logging.StreamHandler()
67        lsh.setLevel(logging.DEBUG)
68        lsh.setFormatter(FORMATTER)
69        if not self._logger.hasHandlers():
70            # avoid keep adding handlers and therefore duplicate messages
71            self._logger.addHandler(lsh)
72
73    def get_logger(self) -> logging.Logger:
74        """Get the _logger instance variable.
75
76        Returns:
77            logging.Logger: the logger object.
78        """
79        return self._logger

Handle the logging of the lakehouse engine project.

LoggingHandler(class_name: str)
57    def __init__(self, class_name: str):
58        """Construct a LoggingHandler instance.
59
60        Args:
61            class_name: name of the class to be indicated in the logs.
62        """
63        self._logger: logging.Logger = logging.getLogger(class_name)
64        self._logger.setLevel(logging.DEBUG)
65        self._logger.addFilter(FilterSensitiveData())
66        lsh = logging.StreamHandler()
67        lsh.setLevel(logging.DEBUG)
68        lsh.setFormatter(FORMATTER)
69        if not self._logger.hasHandlers():
70            # avoid keep adding handlers and therefore duplicate messages
71            self._logger.addHandler(lsh)

Construct a LoggingHandler instance.

Arguments:
  • class_name: name of the class to be indicated in the logs.
def get_logger(self) -> logging.Logger:
73    def get_logger(self) -> logging.Logger:
74        """Get the _logger instance variable.
75
76        Returns:
77            logging.Logger: the logger object.
78        """
79        return self._logger

Get the _logger instance variable.

Returns:

logging.Logger: the logger object.