Skip to content

Repartitioners

Module with repartitioners transformers.

Repartitioners

Bases: object

Class containing repartitioners transformers.

Source code in mkdocs/lakehouse_engine/packages/transformers/repartitioners.py
class Repartitioners(object):
    """Class containing repartitioners transformers."""

    _logger = LoggingHandler(__name__).get_logger()

    @classmethod
    def coalesce(cls, num_partitions: int) -> Callable:
        """Coalesce a dataframe into n partitions.

        Args:
            num_partitions: num of partitions to coalesce.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='coalesce')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            return df.coalesce(num_partitions)

        return inner

    @classmethod
    def repartition(
        cls, num_partitions: Optional[int] = None, cols: Optional[List[str]] = None
    ) -> Callable:
        """Repartition a dataframe into n partitions.

        If num_partitions is provided repartitioning happens based on the provided
        number, otherwise it happens based on the values of the provided cols (columns).

        Args:
            num_partitions: num of partitions to repartition.
            cols: list of columns to use for repartitioning.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='repartition')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            if cols:
                return df.repartition(num_partitions, *cols)
            elif num_partitions:
                return df.repartition(num_partitions)
            else:
                raise WrongArgumentsException(
                    "num_partitions or cols should be specified"
                )

        return inner

coalesce(num_partitions) classmethod

Coalesce a dataframe into n partitions.

Parameters:

Name Type Description Default
num_partitions int

num of partitions to coalesce.

required

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

View Example of coalesce (See full example here)
39{
40    "function": "coalesce",
41    "args": {
42        "num_partitions": 1
43    }
44}
Source code in mkdocs/lakehouse_engine/packages/transformers/repartitioners.py
@classmethod
def coalesce(cls, num_partitions: int) -> Callable:
    """Coalesce a dataframe into n partitions.

    Args:
        num_partitions: num of partitions to coalesce.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='coalesce')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        return df.coalesce(num_partitions)

    return inner

repartition(num_partitions=None, cols=None) classmethod

Repartition a dataframe into n partitions.

If num_partitions is provided repartitioning happens based on the provided number, otherwise it happens based on the values of the provided cols (columns).

Parameters:

Name Type Description Default
num_partitions Optional[int]

num of partitions to repartition.

None
cols Optional[List[str]]

list of columns to use for repartitioning.

None

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

View Example of repartition (See full example here)
48{
49    "function": "repartition",
50    "args": {
51        "num_partitions": 1
52    }
53}
Source code in mkdocs/lakehouse_engine/packages/transformers/repartitioners.py
@classmethod
def repartition(
    cls, num_partitions: Optional[int] = None, cols: Optional[List[str]] = None
) -> Callable:
    """Repartition a dataframe into n partitions.

    If num_partitions is provided repartitioning happens based on the provided
    number, otherwise it happens based on the values of the provided cols (columns).

    Args:
        num_partitions: num of partitions to repartition.
        cols: list of columns to use for repartitioning.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='repartition')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        if cols:
            return df.repartition(num_partitions, *cols)
        elif num_partitions:
            return df.repartition(num_partitions)
        else:
            raise WrongArgumentsException(
                "num_partitions or cols should be specified"
            )

    return inner