lakehouse_engine.utils.file_utils
Utilities for file name based operations.
1"""Utilities for file name based operations.""" 2 3import re 4from os import listdir 5from typing import List 6 7 8def get_file_names_without_file_type( 9 path: str, file_type: str, exclude_regex: str 10) -> list: 11 """Function to retrieve list of file names in a folder. 12 13 This function filters by file type and removes the extension of the file name 14 it returns. 15 16 Args: 17 path: path to the folder to list files 18 file_type: type of the file to include in list 19 exclude_regex: regex of file names to exclude 20 21 Returns: 22 A list of file names without file type. 23 """ 24 file_list: List[str] = [] 25 26 for file in listdir(path): 27 if not re.search(exclude_regex, file) and file.endswith(file_type): 28 file_list.append(file.split(".")[0]) 29 30 return file_list 31 32 33def get_directory_path(path: str) -> str: 34 """Add '/' to the end of the path of a directory. 35 36 Args: 37 path: directory to be processed 38 39 Returns: 40 Directory path stripped and with '/' at the end. 41 """ 42 path = path.strip() 43 return path if path[-1] == "/" else path + "/"
def
get_file_names_without_file_type(path: str, file_type: str, exclude_regex: str) -> list:
9def get_file_names_without_file_type( 10 path: str, file_type: str, exclude_regex: str 11) -> list: 12 """Function to retrieve list of file names in a folder. 13 14 This function filters by file type and removes the extension of the file name 15 it returns. 16 17 Args: 18 path: path to the folder to list files 19 file_type: type of the file to include in list 20 exclude_regex: regex of file names to exclude 21 22 Returns: 23 A list of file names without file type. 24 """ 25 file_list: List[str] = [] 26 27 for file in listdir(path): 28 if not re.search(exclude_regex, file) and file.endswith(file_type): 29 file_list.append(file.split(".")[0]) 30 31 return file_list
Function to retrieve list of file names in a folder.
This function filters by file type and removes the extension of the file name it returns.
Arguments:
- path: path to the folder to list files
- file_type: type of the file to include in list
- exclude_regex: regex of file names to exclude
Returns:
A list of file names without file type.
def
get_directory_path(path: str) -> str:
34def get_directory_path(path: str) -> str: 35 """Add '/' to the end of the path of a directory. 36 37 Args: 38 path: directory to be processed 39 40 Returns: 41 Directory path stripped and with '/' at the end. 42 """ 43 path = path.strip() 44 return path if path[-1] == "/" else path + "/"
Add '/' to the end of the path of a directory.
Arguments:
- path: directory to be processed
Returns:
Directory path stripped and with '/' at the end.