Skip to content

Regex transformers

Regex transformers module.

RegexTransformers

Bases: object

Class containing all regex functions.

Source code in mkdocs/lakehouse_engine/packages/transformers/regex_transformers.py
class RegexTransformers(object):
    """Class containing all regex functions."""

    _logger = LoggingHandler(__name__).get_logger()

    @staticmethod
    def with_regex_value(
        input_col: str,
        output_col: str,
        regex: str,
        drop_input_col: bool = False,
        idx: int = 1,
    ) -> Callable:
        """Get the result of applying a regex to an input column (via regexp_extract).

        Args:
            input_col: name of the input column.
            output_col: name of the output column.
            regex: regular expression.
            drop_input_col: whether to drop input_col or not.
            idx: index to return.

        Returns:
            A function to be executed in the .transform() spark function.

        {{get_example(method_name='with_regex_value')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            df = df.withColumn(output_col, regexp_extract(col(input_col), regex, idx))

            if drop_input_col:
                df = df.drop(input_col)

            return df

        return inner

with_regex_value(input_col, output_col, regex, drop_input_col=False, idx=1) staticmethod

Get the result of applying a regex to an input column (via regexp_extract).

Parameters:

Name Type Description Default
input_col str

name of the input column.

required
output_col str

name of the output column.

required
regex str

regular expression.

required
drop_input_col bool

whether to drop input_col or not.

False
idx int

index to return.

1

Returns:

Type Description
Callable

A function to be executed in the .transform() spark function.

View Example of with_regex_value (See full example here)
40{
41    "function": "with_regex_value",
42    "args": {
43        "input_col": "lhe_extraction_filepath",
44        "output_col": "extraction_date",
45        "drop_input_col": true,
46        "regex": ".*WE_SO_SCL_(\\d+).csv"
47    }
48}
Source code in mkdocs/lakehouse_engine/packages/transformers/regex_transformers.py
@staticmethod
def with_regex_value(
    input_col: str,
    output_col: str,
    regex: str,
    drop_input_col: bool = False,
    idx: int = 1,
) -> Callable:
    """Get the result of applying a regex to an input column (via regexp_extract).

    Args:
        input_col: name of the input column.
        output_col: name of the output column.
        regex: regular expression.
        drop_input_col: whether to drop input_col or not.
        idx: index to return.

    Returns:
        A function to be executed in the .transform() spark function.

    {{get_example(method_name='with_regex_value')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        df = df.withColumn(output_col, regexp_extract(col(input_col), regex, idx))

        if drop_input_col:
            df = df.drop(input_col)

        return df

    return inner