Unions

Module with union transformers.

`Unions` ¶

Bases: object

Class containing union transformers.

Source code in mkdocs/lakehouse_engine/packages/transformers/unions.py

class Unions(object):
    """Class containing union transformers."""

    _logger = LoggingHandler(__name__).get_logger()

    @classmethod
    def union(
        cls,
        union_with: List[DataFrame],
        deduplication: bool = True,
    ) -> Callable:
        """Union dataframes, resolving columns by position (not by name).

        Args:
            union_with: list of dataframes to union.
            deduplication: whether to perform deduplication of elements or not.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='union')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            union_df = reduce(lambda x, y: x.union(y), [df] + union_with)

            return union_df.distinct() if deduplication else union_df

        return inner

    @classmethod
    def union_by_name(
        cls,
        union_with: List[DataFrame],
        deduplication: bool = True,
        allow_missing_columns: bool = True,
    ) -> Callable:
        """Union dataframes, resolving columns by name (not by position).

        Args:
            union_with: list of dataframes to union.
            deduplication: whether to perform deduplication of elements or not.
            allow_missing_columns: allow the union of DataFrames with different
                schemas.

        Returns:
            A function to be called in .transform() spark function.

        {{get_example(method_name='union_by_name')}}
        """

        def inner(df: DataFrame) -> DataFrame:
            union_df = reduce(
                lambda x, y: x.unionByName(
                    y, allowMissingColumns=allow_missing_columns
                ),
                [df] + union_with,
            )

            return union_df.distinct() if deduplication else union_df

        return inner

`union(union_with, deduplication=True)` `classmethod` ¶

Union dataframes, resolving columns by position (not by name).

Parameters:

Name	Type	Description	Default
`union_with`	`List[DataFrame]`	list of dataframes to union.	required
`deduplication`	`bool`	whether to perform deduplication of elements or not.	`True`

Returns:

Type	Description
`Callable`	A function to be called in .transform() spark function.

Source code in mkdocs/lakehouse_engine/packages/transformers/unions.py

@classmethod
def union(
    cls,
    union_with: List[DataFrame],
    deduplication: bool = True,
) -> Callable:
    """Union dataframes, resolving columns by position (not by name).

    Args:
        union_with: list of dataframes to union.
        deduplication: whether to perform deduplication of elements or not.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='union')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        union_df = reduce(lambda x, y: x.union(y), [df] + union_with)

        return union_df.distinct() if deduplication else union_df

    return inner

`union_by_name(union_with, deduplication=True, allow_missing_columns=True)` `classmethod` ¶

Union dataframes, resolving columns by name (not by position).

Parameters:

Name	Type	Description	Default
`union_with`	`List[DataFrame]`	list of dataframes to union.	required
`deduplication`	`bool`	whether to perform deduplication of elements or not.	`True`
`allow_missing_columns`	`bool`	allow the union of DataFrames with different schemas.	`True`

Returns:

Type	Description
`Callable`	A function to be called in .transform() spark function.

Source code in mkdocs/lakehouse_engine/packages/transformers/unions.py

@classmethod
def union_by_name(
    cls,
    union_with: List[DataFrame],
    deduplication: bool = True,
    allow_missing_columns: bool = True,
) -> Callable:
    """Union dataframes, resolving columns by name (not by position).

    Args:
        union_with: list of dataframes to union.
        deduplication: whether to perform deduplication of elements or not.
        allow_missing_columns: allow the union of DataFrames with different
            schemas.

    Returns:
        A function to be called in .transform() spark function.

    {{get_example(method_name='union_by_name')}}
    """

    def inner(df: DataFrame) -> DataFrame:
        union_df = reduce(
            lambda x, y: x.unionByName(
                y, allowMissingColumns=allow_missing_columns
            ),
            [df] + union_with,
        )

        return union_df.distinct() if deduplication else union_df

    return inner

Unions

Unions ¶

union(union_with, deduplication=True) classmethod ¶

union_by_name(union_with, deduplication=True, allow_missing_columns=True) classmethod ¶

`Unions` ¶

`union(union_with, deduplication=True)` `classmethod` ¶

`union_by_name(union_with, deduplication=True, allow_missing_columns=True)` `classmethod` ¶