Skip to content

Null handlers

Module with null handlers transformers.

NullHandlers

Bases: object

Class containing null handler transformers.

Source code in mkdocs/lakehouse_engine/packages/transformers/null_handlers.py
class NullHandlers(object):
    """Class containing null handler transformers."""

    _logger = LoggingHandler(__name__).get_logger()

    @classmethod
    def replace_nulls(
        cls,
        replace_on_nums: bool = True,
        default_num_value: int = -999,
        replace_on_strings: bool = True,
        default_string_value: str = "UNKNOWN",
        subset_cols: List[str] = None,
    ) -> Callable:
        """Replace nulls in a dataframe.

        Args:
            replace_on_nums: if it is to replace nulls on numeric columns.
                Applies to ints, longs and floats.
            default_num_value: default integer value to use as replacement.
            replace_on_strings: if it is to replace nulls on string columns.
            default_string_value: default string value to use as replacement.
            subset_cols: list of columns in which to replace nulls. If not
                provided, all nulls in all columns will be replaced as specified.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='replace_nulls')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            if replace_on_nums:
                df = df.na.fill(default_num_value, subset_cols)
            if replace_on_strings:
                df = df.na.fill(default_string_value, subset_cols)

            return df

        return inner

replace_nulls(replace_on_nums=True, default_num_value=-999, replace_on_strings=True, default_string_value='UNKNOWN', subset_cols=None) classmethod

Replace nulls in a dataframe.

Parameters:

Name Type Description Default
replace_on_nums bool

if it is to replace nulls on numeric columns. Applies to ints, longs and floats.

True
default_num_value int

default integer value to use as replacement.

-999
replace_on_strings bool

if it is to replace nulls on string columns.

True
default_string_value str

default string value to use as replacement.

'UNKNOWN'
subset_cols List[str]

list of columns in which to replace nulls. If not provided, all nulls in all columns will be replaced as specified.

None

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

View Example of replace_nulls (See full example here)
21{
22    "function": "replace_nulls",
23    "args": {
24        "subset_cols": [
25            "amount"
26        ]
27    }
28}
Source code in mkdocs/lakehouse_engine/packages/transformers/null_handlers.py
@classmethod
def replace_nulls(
    cls,
    replace_on_nums: bool = True,
    default_num_value: int = -999,
    replace_on_strings: bool = True,
    default_string_value: str = "UNKNOWN",
    subset_cols: List[str] = None,
) -> Callable:
    """Replace nulls in a dataframe.

    Args:
        replace_on_nums: if it is to replace nulls on numeric columns.
            Applies to ints, longs and floats.
        default_num_value: default integer value to use as replacement.
        replace_on_strings: if it is to replace nulls on string columns.
        default_string_value: default string value to use as replacement.
        subset_cols: list of columns in which to replace nulls. If not
            provided, all nulls in all columns will be replaced as specified.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='replace_nulls')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        if replace_on_nums:
            df = df.na.fill(default_num_value, subset_cols)
        if replace_on_strings:
            df = df.na.fill(default_string_value, subset_cols)

        return df

    return inner