lakehouse_engine.utils.file_utils

Utilities for file name based operations.

 1"""Utilities for file name based operations."""
 2
 3import re
 4from os import listdir
 5from typing import List
 6
 7
 8def get_file_names_without_file_type(
 9    path: str, file_type: str, exclude_regex: str
10) -> list:
11    """Function to retrieve list of file names in a folder.
12
13    This function filters by file type and removes the extension of the file name
14    it returns.
15
16    Args:
17        path: path to the folder to list files
18        file_type: type of the file to include in list
19        exclude_regex: regex of file names to exclude
20
21    Returns:
22        A list of file names without file type.
23    """
24    file_list: List[str] = []
25
26    for file in listdir(path):
27        if not re.search(exclude_regex, file) and file.endswith(file_type):
28            file_list.append(file.split(".")[0])
29
30    return file_list
31
32
33def get_directory_path(path: str) -> str:
34    """Add '/' to the end of the path of a directory.
35
36    Args:
37        path: directory to be processed
38
39    Returns:
40        Directory path stripped and with '/' at the end.
41    """
42    path = path.strip()
43    return path if path[-1] == "/" else path + "/"
def get_file_names_without_file_type(path: str, file_type: str, exclude_regex: str) -> list:
 9def get_file_names_without_file_type(
10    path: str, file_type: str, exclude_regex: str
11) -> list:
12    """Function to retrieve list of file names in a folder.
13
14    This function filters by file type and removes the extension of the file name
15    it returns.
16
17    Args:
18        path: path to the folder to list files
19        file_type: type of the file to include in list
20        exclude_regex: regex of file names to exclude
21
22    Returns:
23        A list of file names without file type.
24    """
25    file_list: List[str] = []
26
27    for file in listdir(path):
28        if not re.search(exclude_regex, file) and file.endswith(file_type):
29            file_list.append(file.split(".")[0])
30
31    return file_list

Function to retrieve list of file names in a folder.

This function filters by file type and removes the extension of the file name it returns.

Arguments:
  • path: path to the folder to list files
  • file_type: type of the file to include in list
  • exclude_regex: regex of file names to exclude
Returns:

A list of file names without file type.

def get_directory_path(path: str) -> str:
34def get_directory_path(path: str) -> str:
35    """Add '/' to the end of the path of a directory.
36
37    Args:
38        path: directory to be processed
39
40    Returns:
41        Directory path stripped and with '/' at the end.
42    """
43    path = path.strip()
44    return path if path[-1] == "/" else path + "/"

Add '/' to the end of the path of a directory.

Arguments:
  • path: directory to be processed
Returns:

Directory path stripped and with '/' at the end.