lakehouse_engine.utils.sql_parser_utils

Module to parse sql files.

  1"""Module to parse sql files."""
  2
  3from typing import Union
  4
  5from lakehouse_engine.core.definitions import SQLParser
  6
  7
  8class SQLParserUtils(object):
  9    """Parser utilities class."""
 10
 11    def split_sql_commands(
 12        self,
 13        sql_commands: str,
 14        delimiter: str,
 15        advanced_parser: bool,
 16    ) -> list[str]:
 17        """Read the sql commands of a file to choose how to split them.
 18
 19        Args:
 20            sql_commands: commands to be split.
 21            delimiter: delimiter to split the sql commands.
 22            advanced_parser: boolean to define if we need to use a complex split.
 23
 24        Returns:
 25            List with the sql commands.
 26        """
 27        if advanced_parser:
 28            self.sql_commands: str = sql_commands
 29            self.delimiter: str = delimiter
 30            self.separated_sql_commands: list[str] = []
 31            self.split_index: int = 0
 32            return self._split_sql_commands()
 33        else:
 34            return sql_commands.split(delimiter)
 35
 36    def _split_sql_commands(self) -> list[str]:
 37        """Read the sql commands of a file to split them based on a delimiter.
 38
 39        Returns:
 40            List with the sql commands.
 41        """
 42        single_quotes: int = 0
 43        double_quotes: int = 0
 44        one_line_comment: int = 0
 45        multiple_line_comment: int = 0
 46
 47        for index, char in enumerate(self.sql_commands):
 48            if char == SQLParser.SINGLE_QUOTES.value and self._character_validation(
 49                value=[double_quotes, one_line_comment, multiple_line_comment]
 50            ):
 51                single_quotes = self._update_value(
 52                    value=single_quotes,
 53                    condition=self._character_validation(
 54                        value=self._get_substring(first_char=index - 1, last_char=index)
 55                    ),
 56                    operation="+-",
 57                )
 58            elif char == SQLParser.DOUBLE_QUOTES.value and self._character_validation(
 59                value=[single_quotes, one_line_comment, multiple_line_comment]
 60            ):
 61                double_quotes = self._update_value(
 62                    value=double_quotes,
 63                    condition=self._character_validation(
 64                        value=self._get_substring(first_char=index - 1, last_char=index)
 65                    ),
 66                    operation="+-",
 67                )
 68            elif char == SQLParser.SINGLE_TRACE.value and self._character_validation(
 69                value=[double_quotes, single_quotes, multiple_line_comment]
 70            ):
 71                one_line_comment = self._update_value(
 72                    value=one_line_comment,
 73                    condition=(
 74                        self._get_substring(first_char=index, last_char=index + 2)
 75                        == SQLParser.DOUBLE_TRACES.value
 76                    ),
 77                    operation="+",
 78                )
 79            elif (
 80                char == SQLParser.SLASH.value or char == SQLParser.STAR.value
 81            ) and self._character_validation(
 82                value=[double_quotes, single_quotes, one_line_comment]
 83            ):
 84                multiple_line_comment = self._update_value(
 85                    value=multiple_line_comment,
 86                    condition=self._get_substring(first_char=index, last_char=index + 2)
 87                    in SQLParser.MULTIPLE_LINE_COMMENT.value,
 88                    operation="+-",
 89                )
 90
 91            one_line_comment = self._update_value(
 92                value=one_line_comment,
 93                condition=char == SQLParser.PARAGRAPH.value,
 94                operation="-",
 95            )
 96
 97            self._validate_command_is_closed(
 98                index=index,
 99                dependencies=self._character_validation(
100                    value=[
101                        single_quotes,
102                        double_quotes,
103                        one_line_comment,
104                        multiple_line_comment,
105                    ]
106                ),
107            )
108
109        return self.separated_sql_commands
110
111    def _get_substring(self, first_char: int = None, last_char: int = None) -> str:
112        """Get the substring based on the indexes passed as arguments.
113
114        Args:
115            first_char: represents the first index of the string.
116            last_char: represents the last index of the string.
117
118        Returns:
119            The substring based on the indexes passed as arguments.
120        """
121        return self.sql_commands[first_char:last_char]
122
123    def _validate_command_is_closed(self, index: int, dependencies: int) -> None:
124        """Validate based on the delimiter if we have the closing of a sql command.
125
126        Args:
127            index: index of the character in a string.
128            dependencies: represents an int to validate if we are outside of quotes,...
129        """
130        if (
131            self._get_substring(first_char=index, last_char=index + len(self.delimiter))
132            == self.delimiter
133            and dependencies
134        ):
135            self._add_new_command(
136                sql_command=self._get_substring(
137                    first_char=self.split_index, last_char=index
138                )
139            )
140            self.split_index = index + len(self.delimiter)
141
142        if self._get_substring(
143            first_char=index, last_char=index + len(self.delimiter)
144        ) != self.delimiter and index + len(self.delimiter) == len(self.sql_commands):
145            self._add_new_command(
146                sql_command=self._get_substring(
147                    first_char=self.split_index, last_char=len(self.sql_commands)
148                )
149            )
150
151    def _character_validation(self, value: Union[str, list]) -> bool:
152        """Validate if character is the opening/closing/inside of a comment.
153
154        Args:
155            value: represent the value associated to different validated
156            types or a character to be analyzed.
157
158        Returns:
159            Boolean that indicates if character found is the opening
160            or closing of a comment, is inside of quotes, comments,...
161        """
162        if value.__class__.__name__ == "list":
163            return sum(value) == 0
164        else:
165            return value != SQLParser.BACKSLASH.value
166
167    def _add_new_command(self, sql_command: str) -> None:
168        """Add a newly found command to list of sql commands to execute.
169
170        Args:
171            sql_command: command to be added to list.
172        """
173        self.separated_sql_commands.append(str(sql_command))
174
175    def _update_value(self, value: int, operation: str, condition: bool = False) -> int:
176        """Update value associated to different types of comments or quotes.
177
178        Args:
179            value: value to be updated
180            operation: operation that we want to perform on the value.
181            condition: validate if we have a condition associated to the value.
182
183        Returns:
184            A integer that represents the updated value.
185        """
186        if condition and operation == "+-":
187            value = value + 1 if value == 0 else value - 1
188        elif condition and operation == "+":
189            value = value + 1 if value == 0 else value
190        elif condition and operation == "-":
191            value = value - 1 if value == 1 else value
192
193        return value
class SQLParserUtils:
  9class SQLParserUtils(object):
 10    """Parser utilities class."""
 11
 12    def split_sql_commands(
 13        self,
 14        sql_commands: str,
 15        delimiter: str,
 16        advanced_parser: bool,
 17    ) -> list[str]:
 18        """Read the sql commands of a file to choose how to split them.
 19
 20        Args:
 21            sql_commands: commands to be split.
 22            delimiter: delimiter to split the sql commands.
 23            advanced_parser: boolean to define if we need to use a complex split.
 24
 25        Returns:
 26            List with the sql commands.
 27        """
 28        if advanced_parser:
 29            self.sql_commands: str = sql_commands
 30            self.delimiter: str = delimiter
 31            self.separated_sql_commands: list[str] = []
 32            self.split_index: int = 0
 33            return self._split_sql_commands()
 34        else:
 35            return sql_commands.split(delimiter)
 36
 37    def _split_sql_commands(self) -> list[str]:
 38        """Read the sql commands of a file to split them based on a delimiter.
 39
 40        Returns:
 41            List with the sql commands.
 42        """
 43        single_quotes: int = 0
 44        double_quotes: int = 0
 45        one_line_comment: int = 0
 46        multiple_line_comment: int = 0
 47
 48        for index, char in enumerate(self.sql_commands):
 49            if char == SQLParser.SINGLE_QUOTES.value and self._character_validation(
 50                value=[double_quotes, one_line_comment, multiple_line_comment]
 51            ):
 52                single_quotes = self._update_value(
 53                    value=single_quotes,
 54                    condition=self._character_validation(
 55                        value=self._get_substring(first_char=index - 1, last_char=index)
 56                    ),
 57                    operation="+-",
 58                )
 59            elif char == SQLParser.DOUBLE_QUOTES.value and self._character_validation(
 60                value=[single_quotes, one_line_comment, multiple_line_comment]
 61            ):
 62                double_quotes = self._update_value(
 63                    value=double_quotes,
 64                    condition=self._character_validation(
 65                        value=self._get_substring(first_char=index - 1, last_char=index)
 66                    ),
 67                    operation="+-",
 68                )
 69            elif char == SQLParser.SINGLE_TRACE.value and self._character_validation(
 70                value=[double_quotes, single_quotes, multiple_line_comment]
 71            ):
 72                one_line_comment = self._update_value(
 73                    value=one_line_comment,
 74                    condition=(
 75                        self._get_substring(first_char=index, last_char=index + 2)
 76                        == SQLParser.DOUBLE_TRACES.value
 77                    ),
 78                    operation="+",
 79                )
 80            elif (
 81                char == SQLParser.SLASH.value or char == SQLParser.STAR.value
 82            ) and self._character_validation(
 83                value=[double_quotes, single_quotes, one_line_comment]
 84            ):
 85                multiple_line_comment = self._update_value(
 86                    value=multiple_line_comment,
 87                    condition=self._get_substring(first_char=index, last_char=index + 2)
 88                    in SQLParser.MULTIPLE_LINE_COMMENT.value,
 89                    operation="+-",
 90                )
 91
 92            one_line_comment = self._update_value(
 93                value=one_line_comment,
 94                condition=char == SQLParser.PARAGRAPH.value,
 95                operation="-",
 96            )
 97
 98            self._validate_command_is_closed(
 99                index=index,
100                dependencies=self._character_validation(
101                    value=[
102                        single_quotes,
103                        double_quotes,
104                        one_line_comment,
105                        multiple_line_comment,
106                    ]
107                ),
108            )
109
110        return self.separated_sql_commands
111
112    def _get_substring(self, first_char: int = None, last_char: int = None) -> str:
113        """Get the substring based on the indexes passed as arguments.
114
115        Args:
116            first_char: represents the first index of the string.
117            last_char: represents the last index of the string.
118
119        Returns:
120            The substring based on the indexes passed as arguments.
121        """
122        return self.sql_commands[first_char:last_char]
123
124    def _validate_command_is_closed(self, index: int, dependencies: int) -> None:
125        """Validate based on the delimiter if we have the closing of a sql command.
126
127        Args:
128            index: index of the character in a string.
129            dependencies: represents an int to validate if we are outside of quotes,...
130        """
131        if (
132            self._get_substring(first_char=index, last_char=index + len(self.delimiter))
133            == self.delimiter
134            and dependencies
135        ):
136            self._add_new_command(
137                sql_command=self._get_substring(
138                    first_char=self.split_index, last_char=index
139                )
140            )
141            self.split_index = index + len(self.delimiter)
142
143        if self._get_substring(
144            first_char=index, last_char=index + len(self.delimiter)
145        ) != self.delimiter and index + len(self.delimiter) == len(self.sql_commands):
146            self._add_new_command(
147                sql_command=self._get_substring(
148                    first_char=self.split_index, last_char=len(self.sql_commands)
149                )
150            )
151
152    def _character_validation(self, value: Union[str, list]) -> bool:
153        """Validate if character is the opening/closing/inside of a comment.
154
155        Args:
156            value: represent the value associated to different validated
157            types or a character to be analyzed.
158
159        Returns:
160            Boolean that indicates if character found is the opening
161            or closing of a comment, is inside of quotes, comments,...
162        """
163        if value.__class__.__name__ == "list":
164            return sum(value) == 0
165        else:
166            return value != SQLParser.BACKSLASH.value
167
168    def _add_new_command(self, sql_command: str) -> None:
169        """Add a newly found command to list of sql commands to execute.
170
171        Args:
172            sql_command: command to be added to list.
173        """
174        self.separated_sql_commands.append(str(sql_command))
175
176    def _update_value(self, value: int, operation: str, condition: bool = False) -> int:
177        """Update value associated to different types of comments or quotes.
178
179        Args:
180            value: value to be updated
181            operation: operation that we want to perform on the value.
182            condition: validate if we have a condition associated to the value.
183
184        Returns:
185            A integer that represents the updated value.
186        """
187        if condition and operation == "+-":
188            value = value + 1 if value == 0 else value - 1
189        elif condition and operation == "+":
190            value = value + 1 if value == 0 else value
191        elif condition and operation == "-":
192            value = value - 1 if value == 1 else value
193
194        return value

Parser utilities class.

def split_sql_commands( self, sql_commands: str, delimiter: str, advanced_parser: bool) -> list[str]:
12    def split_sql_commands(
13        self,
14        sql_commands: str,
15        delimiter: str,
16        advanced_parser: bool,
17    ) -> list[str]:
18        """Read the sql commands of a file to choose how to split them.
19
20        Args:
21            sql_commands: commands to be split.
22            delimiter: delimiter to split the sql commands.
23            advanced_parser: boolean to define if we need to use a complex split.
24
25        Returns:
26            List with the sql commands.
27        """
28        if advanced_parser:
29            self.sql_commands: str = sql_commands
30            self.delimiter: str = delimiter
31            self.separated_sql_commands: list[str] = []
32            self.split_index: int = 0
33            return self._split_sql_commands()
34        else:
35            return sql_commands.split(delimiter)

Read the sql commands of a file to choose how to split them.

Arguments:
  • sql_commands: commands to be split.
  • delimiter: delimiter to split the sql commands.
  • advanced_parser: boolean to define if we need to use a complex split.
Returns:

List with the sql commands.