lakehouse_engine.utils.sql_parser_utils
Module to parse sql files.
1"""Module to parse sql files.""" 2 3from typing import Union 4 5from lakehouse_engine.core.definitions import SQLParser 6 7 8class SQLParserUtils(object): 9 """Parser utilities class.""" 10 11 def split_sql_commands( 12 self, 13 sql_commands: str, 14 delimiter: str, 15 advanced_parser: bool, 16 ) -> list[str]: 17 """Read the sql commands of a file to choose how to split them. 18 19 Args: 20 sql_commands: commands to be split. 21 delimiter: delimiter to split the sql commands. 22 advanced_parser: boolean to define if we need to use a complex split. 23 24 Returns: 25 List with the sql commands. 26 """ 27 if advanced_parser: 28 self.sql_commands: str = sql_commands 29 self.delimiter: str = delimiter 30 self.separated_sql_commands: list[str] = [] 31 self.split_index: int = 0 32 return self._split_sql_commands() 33 else: 34 return sql_commands.split(delimiter) 35 36 def _split_sql_commands(self) -> list[str]: 37 """Read the sql commands of a file to split them based on a delimiter. 38 39 Returns: 40 List with the sql commands. 41 """ 42 single_quotes: int = 0 43 double_quotes: int = 0 44 one_line_comment: int = 0 45 multiple_line_comment: int = 0 46 47 for index, char in enumerate(self.sql_commands): 48 if char == SQLParser.SINGLE_QUOTES.value and self._character_validation( 49 value=[double_quotes, one_line_comment, multiple_line_comment] 50 ): 51 single_quotes = self._update_value( 52 value=single_quotes, 53 condition=self._character_validation( 54 value=self._get_substring(first_char=index - 1, last_char=index) 55 ), 56 operation="+-", 57 ) 58 elif char == SQLParser.DOUBLE_QUOTES.value and self._character_validation( 59 value=[single_quotes, one_line_comment, multiple_line_comment] 60 ): 61 double_quotes = self._update_value( 62 value=double_quotes, 63 condition=self._character_validation( 64 value=self._get_substring(first_char=index - 1, last_char=index) 65 ), 66 operation="+-", 67 ) 68 elif char == SQLParser.SINGLE_TRACE.value and self._character_validation( 69 value=[double_quotes, single_quotes, multiple_line_comment] 70 ): 71 one_line_comment = self._update_value( 72 value=one_line_comment, 73 condition=( 74 self._get_substring(first_char=index, last_char=index + 2) 75 == SQLParser.DOUBLE_TRACES.value 76 ), 77 operation="+", 78 ) 79 elif ( 80 char == SQLParser.SLASH.value or char == SQLParser.STAR.value 81 ) and self._character_validation( 82 value=[double_quotes, single_quotes, one_line_comment] 83 ): 84 multiple_line_comment = self._update_value( 85 value=multiple_line_comment, 86 condition=self._get_substring(first_char=index, last_char=index + 2) 87 in SQLParser.MULTIPLE_LINE_COMMENT.value, 88 operation="+-", 89 ) 90 91 one_line_comment = self._update_value( 92 value=one_line_comment, 93 condition=char == SQLParser.PARAGRAPH.value, 94 operation="-", 95 ) 96 97 self._validate_command_is_closed( 98 index=index, 99 dependencies=self._character_validation( 100 value=[ 101 single_quotes, 102 double_quotes, 103 one_line_comment, 104 multiple_line_comment, 105 ] 106 ), 107 ) 108 109 return self.separated_sql_commands 110 111 def _get_substring(self, first_char: int = None, last_char: int = None) -> str: 112 """Get the substring based on the indexes passed as arguments. 113 114 Args: 115 first_char: represents the first index of the string. 116 last_char: represents the last index of the string. 117 118 Returns: 119 The substring based on the indexes passed as arguments. 120 """ 121 return self.sql_commands[first_char:last_char] 122 123 def _validate_command_is_closed(self, index: int, dependencies: int) -> None: 124 """Validate based on the delimiter if we have the closing of a sql command. 125 126 Args: 127 index: index of the character in a string. 128 dependencies: represents an int to validate if we are outside of quotes,... 129 """ 130 if ( 131 self._get_substring(first_char=index, last_char=index + len(self.delimiter)) 132 == self.delimiter 133 and dependencies 134 ): 135 self._add_new_command( 136 sql_command=self._get_substring( 137 first_char=self.split_index, last_char=index 138 ) 139 ) 140 self.split_index = index + len(self.delimiter) 141 142 if self._get_substring( 143 first_char=index, last_char=index + len(self.delimiter) 144 ) != self.delimiter and index + len(self.delimiter) == len(self.sql_commands): 145 self._add_new_command( 146 sql_command=self._get_substring( 147 first_char=self.split_index, last_char=len(self.sql_commands) 148 ) 149 ) 150 151 def _character_validation(self, value: Union[str, list]) -> bool: 152 """Validate if character is the opening/closing/inside of a comment. 153 154 Args: 155 value: represent the value associated to different validated 156 types or a character to be analyzed. 157 158 Returns: 159 Boolean that indicates if character found is the opening 160 or closing of a comment, is inside of quotes, comments,... 161 """ 162 if value.__class__.__name__ == "list": 163 return sum(value) == 0 164 else: 165 return value != SQLParser.BACKSLASH.value 166 167 def _add_new_command(self, sql_command: str) -> None: 168 """Add a newly found command to list of sql commands to execute. 169 170 Args: 171 sql_command: command to be added to list. 172 """ 173 self.separated_sql_commands.append(str(sql_command)) 174 175 def _update_value(self, value: int, operation: str, condition: bool = False) -> int: 176 """Update value associated to different types of comments or quotes. 177 178 Args: 179 value: value to be updated 180 operation: operation that we want to perform on the value. 181 condition: validate if we have a condition associated to the value. 182 183 Returns: 184 A integer that represents the updated value. 185 """ 186 if condition and operation == "+-": 187 value = value + 1 if value == 0 else value - 1 188 elif condition and operation == "+": 189 value = value + 1 if value == 0 else value 190 elif condition and operation == "-": 191 value = value - 1 if value == 1 else value 192 193 return value
class
SQLParserUtils:
9class SQLParserUtils(object): 10 """Parser utilities class.""" 11 12 def split_sql_commands( 13 self, 14 sql_commands: str, 15 delimiter: str, 16 advanced_parser: bool, 17 ) -> list[str]: 18 """Read the sql commands of a file to choose how to split them. 19 20 Args: 21 sql_commands: commands to be split. 22 delimiter: delimiter to split the sql commands. 23 advanced_parser: boolean to define if we need to use a complex split. 24 25 Returns: 26 List with the sql commands. 27 """ 28 if advanced_parser: 29 self.sql_commands: str = sql_commands 30 self.delimiter: str = delimiter 31 self.separated_sql_commands: list[str] = [] 32 self.split_index: int = 0 33 return self._split_sql_commands() 34 else: 35 return sql_commands.split(delimiter) 36 37 def _split_sql_commands(self) -> list[str]: 38 """Read the sql commands of a file to split them based on a delimiter. 39 40 Returns: 41 List with the sql commands. 42 """ 43 single_quotes: int = 0 44 double_quotes: int = 0 45 one_line_comment: int = 0 46 multiple_line_comment: int = 0 47 48 for index, char in enumerate(self.sql_commands): 49 if char == SQLParser.SINGLE_QUOTES.value and self._character_validation( 50 value=[double_quotes, one_line_comment, multiple_line_comment] 51 ): 52 single_quotes = self._update_value( 53 value=single_quotes, 54 condition=self._character_validation( 55 value=self._get_substring(first_char=index - 1, last_char=index) 56 ), 57 operation="+-", 58 ) 59 elif char == SQLParser.DOUBLE_QUOTES.value and self._character_validation( 60 value=[single_quotes, one_line_comment, multiple_line_comment] 61 ): 62 double_quotes = self._update_value( 63 value=double_quotes, 64 condition=self._character_validation( 65 value=self._get_substring(first_char=index - 1, last_char=index) 66 ), 67 operation="+-", 68 ) 69 elif char == SQLParser.SINGLE_TRACE.value and self._character_validation( 70 value=[double_quotes, single_quotes, multiple_line_comment] 71 ): 72 one_line_comment = self._update_value( 73 value=one_line_comment, 74 condition=( 75 self._get_substring(first_char=index, last_char=index + 2) 76 == SQLParser.DOUBLE_TRACES.value 77 ), 78 operation="+", 79 ) 80 elif ( 81 char == SQLParser.SLASH.value or char == SQLParser.STAR.value 82 ) and self._character_validation( 83 value=[double_quotes, single_quotes, one_line_comment] 84 ): 85 multiple_line_comment = self._update_value( 86 value=multiple_line_comment, 87 condition=self._get_substring(first_char=index, last_char=index + 2) 88 in SQLParser.MULTIPLE_LINE_COMMENT.value, 89 operation="+-", 90 ) 91 92 one_line_comment = self._update_value( 93 value=one_line_comment, 94 condition=char == SQLParser.PARAGRAPH.value, 95 operation="-", 96 ) 97 98 self._validate_command_is_closed( 99 index=index, 100 dependencies=self._character_validation( 101 value=[ 102 single_quotes, 103 double_quotes, 104 one_line_comment, 105 multiple_line_comment, 106 ] 107 ), 108 ) 109 110 return self.separated_sql_commands 111 112 def _get_substring(self, first_char: int = None, last_char: int = None) -> str: 113 """Get the substring based on the indexes passed as arguments. 114 115 Args: 116 first_char: represents the first index of the string. 117 last_char: represents the last index of the string. 118 119 Returns: 120 The substring based on the indexes passed as arguments. 121 """ 122 return self.sql_commands[first_char:last_char] 123 124 def _validate_command_is_closed(self, index: int, dependencies: int) -> None: 125 """Validate based on the delimiter if we have the closing of a sql command. 126 127 Args: 128 index: index of the character in a string. 129 dependencies: represents an int to validate if we are outside of quotes,... 130 """ 131 if ( 132 self._get_substring(first_char=index, last_char=index + len(self.delimiter)) 133 == self.delimiter 134 and dependencies 135 ): 136 self._add_new_command( 137 sql_command=self._get_substring( 138 first_char=self.split_index, last_char=index 139 ) 140 ) 141 self.split_index = index + len(self.delimiter) 142 143 if self._get_substring( 144 first_char=index, last_char=index + len(self.delimiter) 145 ) != self.delimiter and index + len(self.delimiter) == len(self.sql_commands): 146 self._add_new_command( 147 sql_command=self._get_substring( 148 first_char=self.split_index, last_char=len(self.sql_commands) 149 ) 150 ) 151 152 def _character_validation(self, value: Union[str, list]) -> bool: 153 """Validate if character is the opening/closing/inside of a comment. 154 155 Args: 156 value: represent the value associated to different validated 157 types or a character to be analyzed. 158 159 Returns: 160 Boolean that indicates if character found is the opening 161 or closing of a comment, is inside of quotes, comments,... 162 """ 163 if value.__class__.__name__ == "list": 164 return sum(value) == 0 165 else: 166 return value != SQLParser.BACKSLASH.value 167 168 def _add_new_command(self, sql_command: str) -> None: 169 """Add a newly found command to list of sql commands to execute. 170 171 Args: 172 sql_command: command to be added to list. 173 """ 174 self.separated_sql_commands.append(str(sql_command)) 175 176 def _update_value(self, value: int, operation: str, condition: bool = False) -> int: 177 """Update value associated to different types of comments or quotes. 178 179 Args: 180 value: value to be updated 181 operation: operation that we want to perform on the value. 182 condition: validate if we have a condition associated to the value. 183 184 Returns: 185 A integer that represents the updated value. 186 """ 187 if condition and operation == "+-": 188 value = value + 1 if value == 0 else value - 1 189 elif condition and operation == "+": 190 value = value + 1 if value == 0 else value 191 elif condition and operation == "-": 192 value = value - 1 if value == 1 else value 193 194 return value
Parser utilities class.
def
split_sql_commands( self, sql_commands: str, delimiter: str, advanced_parser: bool) -> list[str]:
12 def split_sql_commands( 13 self, 14 sql_commands: str, 15 delimiter: str, 16 advanced_parser: bool, 17 ) -> list[str]: 18 """Read the sql commands of a file to choose how to split them. 19 20 Args: 21 sql_commands: commands to be split. 22 delimiter: delimiter to split the sql commands. 23 advanced_parser: boolean to define if we need to use a complex split. 24 25 Returns: 26 List with the sql commands. 27 """ 28 if advanced_parser: 29 self.sql_commands: str = sql_commands 30 self.delimiter: str = delimiter 31 self.separated_sql_commands: list[str] = [] 32 self.split_index: int = 0 33 return self._split_sql_commands() 34 else: 35 return sql_commands.split(delimiter)
Read the sql commands of a file to choose how to split them.
Arguments:
- sql_commands: commands to be split.
- delimiter: delimiter to split the sql commands.
- advanced_parser: boolean to define if we need to use a complex split.
Returns:
List with the sql commands.