Skip to content

Optimizers

Optimizers module.

Optimizers

Bases: object

Class containing all the functions that can provide optimizations.

Source code in mkdocs/lakehouse_engine/packages/transformers/optimizers.py
class Optimizers(object):
    """Class containing all the functions that can provide optimizations."""

    _logger = LoggingHandler(__name__).get_logger()

    @classmethod
    def cache(cls) -> Callable:
        """Caches the current dataframe.

        The default storage level used is MEMORY_AND_DISK.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='cache')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            return df.cache()

        return inner

    @classmethod
    def persist(cls, storage_level: str = None) -> Callable:
        """Caches the current dataframe with a specific StorageLevel.

        Args:
            storage_level: the type of StorageLevel, as default MEMORY_AND_DISK_DESER.
                [More options here](
                https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.StorageLevel.html).

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='persist')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            level = getattr(
                StorageLevel, storage_level, StorageLevel.MEMORY_AND_DISK_DESER
            )

            return df.persist(level)

        return inner

    @classmethod
    def unpersist(cls, blocking: bool = False) -> Callable:
        """Removes the dataframe from the disk and memory.

        Args:
            blocking: whether to block until all the data blocks are
                removed from disk/memory or run asynchronously.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='unpersist')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            return df.unpersist(blocking)

        return inner

cache() classmethod

Caches the current dataframe.

The default storage level used is MEMORY_AND_DISK.

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

Source code in mkdocs/lakehouse_engine/packages/transformers/optimizers.py
@classmethod
def cache(cls) -> Callable:
    """Caches the current dataframe.

    The default storage level used is MEMORY_AND_DISK.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='cache')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        return df.cache()

    return inner

persist(storage_level=None) classmethod

Caches the current dataframe with a specific StorageLevel.

Parameters:

Name Type Description Default
storage_level str

the type of StorageLevel, as default MEMORY_AND_DISK_DESER. More options here.

None

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

Source code in mkdocs/lakehouse_engine/packages/transformers/optimizers.py
@classmethod
def persist(cls, storage_level: str = None) -> Callable:
    """Caches the current dataframe with a specific StorageLevel.

    Args:
        storage_level: the type of StorageLevel, as default MEMORY_AND_DISK_DESER.
            [More options here](
            https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.StorageLevel.html).

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='persist')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        level = getattr(
            StorageLevel, storage_level, StorageLevel.MEMORY_AND_DISK_DESER
        )

        return df.persist(level)

    return inner

unpersist(blocking=False) classmethod

Removes the dataframe from the disk and memory.

Parameters:

Name Type Description Default
blocking bool

whether to block until all the data blocks are removed from disk/memory or run asynchronously.

False

Returns:

Type Description
Callable

A function to be called in .transform() spark function.

Source code in mkdocs/lakehouse_engine/packages/transformers/optimizers.py
@classmethod
def unpersist(cls, blocking: bool = False) -> Callable:
    """Removes the dataframe from the disk and memory.

    Args:
        blocking: whether to block until all the data blocks are
            removed from disk/memory or run asynchronously.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='unpersist')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        return df.unpersist(blocking)

    return inner