Total Complexity | 146 |
Total Lines | 660 |
Duplicated Lines | 7.27 % |
Changes | 14 | ||
Bugs | 0 | Features | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ExcelValidationPlugin often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | """ |
||
27 | class ExcelValidationPlugin: |
||
28 | """ |
||
29 | Plugin level class of the Excel validation pattern. |
||
30 | Note: There are no instances of the ExcelValidationPattern on the app. |
||
31 | """ |
||
32 | |||
33 | def __init__(self, plugin): |
||
34 | """ |
||
35 | :param plugin: The plugin, which wants to use documents |
||
36 | :type plugin: GwBasePattern |
||
37 | """ |
||
38 | self._plugin = plugin |
||
39 | self._app = plugin.app |
||
40 | self.excel_config = None |
||
41 | |||
42 | def read_excel(self, excel_config_json_path, excel_workbook_path): |
||
43 | """ |
||
44 | Main routine to read an Excel sheet. |
||
45 | |||
46 | :param excel_config_json_path: The configuration json file |
||
47 | :param excel_workbook_path: Relative or absolute path to an Excel workbook |
||
48 | :return: Data dictionary with rows/colums as keys and a dictionary of "header": value as items |
||
49 | """ |
||
50 | |||
51 | # The exceptions raised in this method shall be raised to the plugin level |
||
52 | self.excel_config = self._validate_json(excel_config_json_path) |
||
53 | |||
54 | ######################### |
||
55 | # Get index configuration |
||
56 | ######################### |
||
57 | orientation = self.excel_config['orientation'] |
||
58 | header_idx_cfg_row_first = self.excel_config['headers_index_config']['row_index']['first'] |
||
59 | header_idx_cfg_row_last = self.excel_config['headers_index_config']['row_index']['last'] |
||
60 | header_idx_cfg_col_first = self.excel_config['headers_index_config']['column_index']['first'] |
||
61 | header_idx_cfg_col_last = self.excel_config['headers_index_config']['column_index']['last'] |
||
62 | data_idx_cfg_row_first = self.excel_config['data_index_config']['row_index']['first'] |
||
63 | data_idx_cfg_row_last = self.excel_config['data_index_config']['row_index']['last'] |
||
64 | data_idx_cfg_col_first = self.excel_config['data_index_config']['column_index']['first'] |
||
65 | data_idx_cfg_col_last = self.excel_config['data_index_config']['column_index']['last'] |
||
66 | |||
67 | ############################################################################ |
||
68 | # Rotate coordinates so we can work with virtually column based all the time |
||
69 | # Just before addressing the cells a coordinate transformation is done again |
||
70 | ############################################################################ |
||
71 | if orientation == 'column_based': |
||
72 | # Correct the header and data matrix according to the orientation |
||
73 | # We rotate the row_based layout so we can work with column_based in mind all the time |
||
74 | # Name schema: corrected headers/data index config column/row first/last |
||
75 | corr_header_idx_cfg_row_first = header_idx_cfg_row_first |
||
76 | corr_header_idx_cfg_row_last = header_idx_cfg_row_last |
||
77 | corr_header_idx_cfg_col_first = header_idx_cfg_col_first |
||
78 | corr_header_idx_cfg_col_last = header_idx_cfg_col_last |
||
79 | corr_data_idx_cfg_row_first = data_idx_cfg_row_first |
||
80 | corr_data_idx_cfg_row_last = data_idx_cfg_row_last |
||
81 | corr_data_idx_cfg_col_first = data_idx_cfg_col_first |
||
82 | corr_data_idx_cfg_col_last = data_idx_cfg_col_last |
||
83 | else: |
||
84 | # row_based layout |
||
85 | corr_header_idx_cfg_row_first = header_idx_cfg_col_first |
||
86 | corr_header_idx_cfg_row_last = header_idx_cfg_col_last |
||
87 | corr_header_idx_cfg_col_first = header_idx_cfg_row_first |
||
88 | corr_header_idx_cfg_col_last = header_idx_cfg_row_last |
||
89 | corr_data_idx_cfg_row_first = data_idx_cfg_col_first |
||
90 | corr_data_idx_cfg_row_last = data_idx_cfg_col_last |
||
91 | corr_data_idx_cfg_col_first = data_idx_cfg_row_first |
||
92 | corr_data_idx_cfg_col_last = data_idx_cfg_row_last |
||
93 | |||
94 | # Used for log and exception messages |
||
95 | oriented_row_text = "row" if orientation == 'column_based' else "column" |
||
96 | oriented_column_text = "column" if orientation == 'column_based' else "row" |
||
97 | |||
98 | ###################################### |
||
99 | # Set defaults of headers_index_config |
||
100 | ###################################### |
||
101 | if type(corr_header_idx_cfg_row_first) is not int: |
||
102 | # Assume the user wants to use the first row as header row |
||
103 | corr_header_idx_cfg_row_first = 1 |
||
104 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format( |
||
105 | oriented_row_text)) |
||
106 | |||
107 | if type(corr_header_idx_cfg_row_last) is not int: |
||
108 | # Only 1 header row is supported currently, set last header row to first header row |
||
109 | corr_header_idx_cfg_row_last = corr_header_idx_cfg_row_first |
||
110 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> last to {1}".format( |
||
111 | oriented_row_text, corr_header_idx_cfg_row_first)) |
||
112 | |||
113 | if type(corr_header_idx_cfg_col_first) is not int: |
||
114 | # Assume the user wants to start at the first column |
||
115 | corr_header_idx_cfg_col_first = 1 |
||
116 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format( |
||
117 | oriented_column_text)) |
||
118 | |||
119 | if type(corr_header_idx_cfg_col_last) is not int: |
||
120 | if type(corr_data_idx_cfg_col_last) is int: |
||
121 | # We don't have a last column in the header config, |
||
122 | # so we use what we have in the data config |
||
123 | corr_header_idx_cfg_col_last = corr_data_idx_cfg_col_last |
||
124 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to " |
||
125 | "{1}".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
126 | |||
127 | ################################### |
||
128 | # Set defaults of data_index_config |
||
129 | ################################### |
||
130 | if type(corr_data_idx_cfg_row_first) is not int: |
||
131 | # Assume the first data row is the next after corr_header_idx_cfg_row_last |
||
132 | corr_data_idx_cfg_row_first = corr_header_idx_cfg_row_last + 1 |
||
133 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format( |
||
134 | oriented_row_text, corr_data_idx_cfg_row_first)) |
||
135 | |||
136 | # corr_data_idx_cfg_row_last has no defaults - the user input is master |
||
137 | |||
138 | if type(corr_data_idx_cfg_col_first) is not int: |
||
139 | # Assume the first data column is equal to the first header column |
||
140 | corr_data_idx_cfg_col_first = corr_header_idx_cfg_col_first |
||
141 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format( |
||
142 | oriented_column_text, corr_data_idx_cfg_col_first)) |
||
143 | |||
144 | if type(corr_data_idx_cfg_col_last) is not int: |
||
145 | if type(corr_header_idx_cfg_col_last) is int: |
||
146 | # We don't have a last column in the data config, |
||
147 | # so we use what we have in the header config |
||
148 | corr_data_idx_cfg_col_last = corr_header_idx_cfg_col_last |
||
149 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> last to " |
||
150 | "{1}".format(oriented_column_text, corr_data_idx_cfg_col_last)) |
||
151 | |||
152 | ################################ |
||
153 | # Some more logic checks on rows |
||
154 | # (matrix size and row order) |
||
155 | ################################ |
||
156 | if corr_header_idx_cfg_row_last != corr_header_idx_cfg_row_first: |
||
157 | # We can compare both because at this point they have to be integer |
||
158 | # Multi line headers given |
||
159 | self._raise_value_error("Config error: Multi line (grouped) headers are not yet supported. " |
||
160 | "First and last header {0} must be equal.".format(oriented_row_text)) |
||
161 | |||
162 | if corr_data_idx_cfg_row_first <= corr_header_idx_cfg_row_last: |
||
163 | # We can compare both because at this point they have to be integer |
||
164 | # The data section is above the header section |
||
165 | self._raise_value_error("Config error: headers_index_config -> {0}_index -> last is greater than " |
||
166 | "data_index_config -> {0}_index -> first.".format(oriented_row_text)) |
||
167 | |||
168 | if type(corr_data_idx_cfg_row_last) is int: |
||
169 | if corr_data_idx_cfg_row_last < corr_data_idx_cfg_row_first: |
||
170 | # The last data row is smaller than the first |
||
171 | self._raise_value_error("Config error: data_index_config -> {0}_index -> first is greater than " |
||
172 | "data_index_config -> {0}_index -> last.".format(oriented_row_text)) |
||
173 | |||
174 | ################################### |
||
175 | # Some more logic checks on columns |
||
176 | # (mismatches) |
||
177 | ################################### |
||
178 | if corr_header_idx_cfg_col_first != corr_data_idx_cfg_col_first: |
||
179 | # We can compare both because at this point they have to be integer |
||
180 | # First column mismatch |
||
181 | self._raise_value_error("Config error: header_index_config -> {0}_index -> first is not equal to " |
||
182 | "data_index_config -> {0}_index -> first.".format(oriented_column_text)) |
||
183 | |||
184 | if type(corr_header_idx_cfg_col_last) is int: |
||
185 | if type(corr_data_idx_cfg_col_last) is int: |
||
186 | if corr_header_idx_cfg_col_last != corr_data_idx_cfg_col_last: |
||
187 | # Last columns are given but do not match |
||
188 | self._raise_value_error( |
||
189 | "Config error: header_index_config -> {0}_index -> last ({1}) is not equal to " |
||
190 | "data_index_config -> {0}_index -> last ({2}).".format( |
||
191 | oriented_column_text, |
||
192 | corr_header_idx_cfg_col_last, |
||
193 | corr_data_idx_cfg_col_last |
||
194 | )) |
||
195 | |||
196 | if type(corr_header_idx_cfg_col_last) is not int: |
||
197 | if type(corr_data_idx_cfg_col_last) is not int: |
||
198 | # The column count search is done on the header row, not on data |
||
199 | if corr_data_idx_cfg_col_last != 'automatic': |
||
200 | self._raise_value_error( |
||
201 | "Config error: data_index_config -> {0}_index -> last ({1}) may only be an integer or " |
||
202 | "contain the value 'automatic'.".format(oriented_column_text, |
||
203 | corr_data_idx_cfg_col_last)) |
||
204 | |||
205 | ######################################### |
||
206 | # Set defaults for optional config values |
||
207 | ######################################### |
||
208 | if 'sheet_config' not in self.excel_config: |
||
209 | self.excel_config['sheet_config'] = 'active' |
||
210 | |||
211 | for data_type_config in self.excel_config['data_type_config']: |
||
212 | # default for possible problems should be strict if user tells nothing |
||
213 | if 'fail_on_type_error' not in data_type_config: |
||
214 | data_type_config['fail_on_type_error'] = True |
||
215 | if 'fail_on_empty_cell' not in data_type_config: |
||
216 | data_type_config['fail_on_empty_cell'] = True |
||
217 | if 'fail_on_header_not_found' not in data_type_config: |
||
218 | data_type_config['fail_on_header_not_found'] = True |
||
219 | |||
220 | # default type is automatic |
||
221 | if 'type' not in data_type_config: |
||
222 | data_type_config['type'] = {'base': 'automatic'} |
||
223 | |||
224 | # Set global filter properties |
||
225 | # Defensive variant is True for all options |
||
226 | if 'filter_properties' not in self.excel_config: |
||
227 | self.excel_config['filter_properties'] = {} |
||
228 | if 'excluded_fail_on_type_error' not in self.excel_config['filter_properties']: |
||
229 | self.excel_config['filter_properties']['excluded_fail_on_type_error'] = True |
||
230 | if 'excluded_fail_on_empty_cell' not in self.excel_config['filter_properties']: |
||
231 | self.excel_config['filter_properties']['excluded_fail_on_empty_cell'] = True |
||
232 | if 'excluded_enable_logging' not in self.excel_config['filter_properties']: |
||
233 | self.excel_config['filter_properties']['excluded_enable_logging'] = True |
||
234 | |||
235 | ############################ |
||
236 | # Get the workbook and sheet |
||
237 | ############################ |
||
238 | wb = openpyxl.load_workbook(excel_workbook_path, data_only=True) |
||
239 | ws = self._get_sheet(wb) |
||
240 | |||
241 | ############################# |
||
242 | # Determine header row length |
||
243 | ############################# |
||
244 | if type(corr_header_idx_cfg_col_last) == int: |
||
245 | # corr_header_idx_cfg_col_last already has the final value |
||
246 | pass |
||
247 | elif corr_header_idx_cfg_col_last == 'automatic': |
||
248 | # automatic: use the length of the header row |
||
249 | corr_header_idx_cfg_col_last = len( |
||
250 | ws[self._transform_coordinates(row=corr_header_idx_cfg_row_first)]) |
||
251 | self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' " |
||
252 | "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
253 | else: |
||
254 | # severalEmptyCells chosen |
||
255 | target_empty_cell_count = int(corr_header_idx_cfg_col_last.split(':')[1]) |
||
256 | empty_cell_count = 0 |
||
257 | curr_column = corr_header_idx_cfg_col_first |
||
258 | while empty_cell_count < target_empty_cell_count: |
||
259 | value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first, |
||
260 | curr_column)].value |
||
261 | if value is None: |
||
262 | empty_cell_count += 1 |
||
263 | curr_column += 1 |
||
264 | corr_header_idx_cfg_col_last = curr_column - target_empty_cell_count - 1 |
||
265 | self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' " |
||
266 | "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
267 | |||
268 | ################################### |
||
269 | # Determine header column locations |
||
270 | ################################### |
||
271 | spreadsheet_headers2columns = {} |
||
272 | for column in range(corr_header_idx_cfg_col_first, corr_header_idx_cfg_col_last + 1): |
||
273 | value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first, column)].value |
||
274 | if value is not None: |
||
275 | spreadsheet_headers2columns[value] = column |
||
276 | else: |
||
277 | # if the value is None we have either |
||
278 | # - one or more empty header cell in between 2 filled header cells. |
||
279 | # - some empty header cells at the end of the row. |
||
280 | # That might happen because when choosing 'automatic' header row detection, openpyxl functionality |
||
281 | # is used. It always returns the length of the longest row in the whole sheet. |
||
282 | # If that is not the header row, we have empty cells at the end of the header row. |
||
283 | # However, that is not a problem as header values 'None' are not added. |
||
284 | pass |
||
285 | spreadsheet_headers = spreadsheet_headers2columns.keys() |
||
286 | |||
287 | ############################################################### |
||
288 | # Check for not existing headers on spreadsheet and config side |
||
289 | ############################################################### |
||
290 | # Build a data_type_config dictionary with header as key |
||
291 | config_header_dict = {x['header']: x for x in self.excel_config['data_type_config']} |
||
292 | |||
293 | # Check: Are config data_type_config headers unique? |
||
294 | if len(config_header_dict.keys()) != len(set(config_header_dict.keys())): |
||
295 | self._raise_value_error("Config error: data_type_config -> header duplicate entries found.") |
||
296 | |||
297 | # Check for configured headers not found in spreadsheet |
||
298 | missing_headers_in_spreadsheet = list(set(config_header_dict.keys()) - set(spreadsheet_headers)) |
||
299 | for header in missing_headers_in_spreadsheet: |
||
300 | # Check if the fail_on_header_not_found is true |
||
301 | data_type = [x for x in self.excel_config['data_type_config'] if x['header'] == header][0] |
||
302 | msg = u"Config error: The header '{0}' could not be found in the spreadsheet.".format(header) |
||
303 | if data_type['fail_on_header_not_found']: |
||
304 | self._raise_value_error(msg) |
||
305 | else: |
||
306 | self._plugin.log.error(msg) |
||
307 | |||
308 | # Check for spreadsheet headers not found in config |
||
309 | missing_headers_in_config = list(set(spreadsheet_headers) - set(config_header_dict.keys())) |
||
310 | if missing_headers_in_config: |
||
311 | self._plugin.log.debug(u"The following spreadsheet headers are not configured for reading: {0}".format( |
||
312 | ', '.join([header.replace('\n', ' ') for header in missing_headers_in_config]))) |
||
313 | for header in missing_headers_in_config: |
||
314 | del spreadsheet_headers2columns[header] |
||
315 | |||
316 | ######################### |
||
317 | # Determine last data row |
||
318 | ######################### |
||
319 | if type(corr_data_idx_cfg_row_last) == int: |
||
320 | # do nothing, the value is already set |
||
321 | pass |
||
322 | elif corr_data_idx_cfg_row_last == 'automatic': |
||
323 | # Take data from library |
||
324 | corr_data_idx_cfg_row_last = corr_data_idx_cfg_row_first |
||
325 | for curr_column in range(corr_header_idx_cfg_col_first, |
||
326 | corr_header_idx_cfg_col_last): |
||
327 | len_curr_column = len(ws[self._transform_coordinates(column=curr_column)]) |
||
328 | if len_curr_column > corr_data_idx_cfg_row_last: |
||
329 | corr_data_idx_cfg_row_last = len_curr_column |
||
330 | self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'automatic' " |
||
331 | "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last)) |
||
332 | else: |
||
333 | # severalEmptyCells is chosen |
||
334 | target_empty_rows_count = int(corr_data_idx_cfg_row_last.split(':')[1]) |
||
335 | last_row_detected = False |
||
336 | curr_row = corr_data_idx_cfg_row_first |
||
337 | empty_rows_count = 0 |
||
338 | while not last_row_detected: |
||
339 | # go through rows |
||
340 | all_columns_empty = True |
||
341 | for header in spreadsheet_headers2columns: |
||
342 | curr_column = spreadsheet_headers2columns[header] |
||
343 | value = ws[self._transform_coordinates(curr_row, curr_column)].value |
||
344 | if value is not None: |
||
345 | all_columns_empty = False |
||
346 | break |
||
347 | if all_columns_empty: |
||
348 | empty_rows_count += 1 |
||
349 | if empty_rows_count >= target_empty_rows_count: |
||
350 | last_row_detected = True |
||
351 | else: |
||
352 | curr_row += 1 |
||
353 | corr_data_idx_cfg_row_last = curr_row - target_empty_rows_count |
||
354 | self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'severalEmptyCells' " |
||
355 | "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last)) |
||
356 | |||
357 | ################################################# |
||
358 | # Go through the rows, read and validate the data |
||
359 | ################################################# |
||
360 | final_dict = {} |
||
361 | for curr_row in range(corr_data_idx_cfg_row_first, corr_data_idx_cfg_row_last + 1): |
||
362 | # Go through rows |
||
363 | final_dict[curr_row] = {} |
||
364 | |||
365 | msg_queue = { |
||
366 | 'fail_on_empty_cell': { |
||
367 | 'exceptions': [], |
||
368 | 'logs': [] |
||
369 | }, |
||
370 | 'fail_on_type_error': { |
||
371 | 'exceptions': [], |
||
372 | 'logs': [] |
||
373 | } |
||
374 | } |
||
375 | is_row_excluded = False |
||
376 | |||
377 | for header in spreadsheet_headers2columns: |
||
378 | # Go through columns |
||
379 | curr_column = spreadsheet_headers2columns[header] |
||
380 | cell_index_str = self._transform_coordinates(curr_row, curr_column) |
||
381 | value = ws[cell_index_str].value |
||
382 | |||
383 | # Start the validation |
||
384 | config_header = config_header_dict[header] |
||
385 | |||
386 | if value is None: |
||
387 | msg = u"The '{0}' in cell {1} is empty".format(header, cell_index_str) |
||
388 | if config_header['fail_on_empty_cell']: |
||
389 | msg_queue['fail_on_empty_cell']['exceptions'].append(msg) |
||
390 | else: |
||
391 | msg_queue['fail_on_empty_cell']['logs'].append(msg) |
||
392 | else: |
||
393 | if config_header['type']['base'] == 'automatic': |
||
394 | pass |
||
395 | elif config_header['type']['base'] == 'date': |
||
396 | if not isinstance(value, datetime.datetime): |
||
397 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
398 | u'specification is datetime'.format(value, cell_index_str, type(value)) |
||
399 | if config_header['fail_on_type_error']: |
||
400 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
401 | else: |
||
402 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
403 | elif config_header['type']['base'] == 'enum': |
||
404 | filtered_enum_values = [] |
||
405 | if 'filter' in config_header['type']: |
||
406 | filtered_enum_values = config_header['type']['filter']['whitelist_values'] |
||
407 | if not self._is_string(value): |
||
408 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
409 | u'specification is a string type (enum)'.format(value, cell_index_str, type(value)) |
||
410 | if config_header['fail_on_type_error']: |
||
411 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
412 | else: |
||
413 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
414 | if filtered_enum_values: |
||
415 | msg = 'Cannot apply enum filter to cell {1} because the type check failed.' |
||
416 | self._plugin.log.error(msg) |
||
417 | else: |
||
418 | valid_values = config_header['type']['enum_values'] |
||
419 | if value not in valid_values: |
||
420 | msg = u'The value {0} in cell {1} is not contained in the given enum ' \ |
||
421 | u'[{2}]'.format(value, cell_index_str, ', '.join(valid_values)) |
||
422 | if config_header['fail_on_type_error']: |
||
423 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
424 | else: |
||
425 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
426 | if filtered_enum_values: |
||
427 | msg = 'Cannot apply enum filter to cell {1} because the enum values check failed.' |
||
428 | self._plugin.log.error(msg) |
||
429 | else: |
||
430 | if filtered_enum_values and value not in filtered_enum_values: |
||
431 | is_row_excluded = True |
||
432 | self._plugin.log.debug(u"The {0} {1} was excluded due to an exclude filter on " |
||
433 | u"cell {2} ({3} not in [{4}]).".format( |
||
434 | oriented_row_text, |
||
435 | curr_row, |
||
436 | cell_index_str, |
||
437 | value, |
||
438 | ', '.join(filtered_enum_values))) |
||
439 | |||
440 | elif config_header['type']['base'] == 'float': |
||
441 | # TODO Allow int, too |
||
442 | View Code Duplication | if not isinstance(value, float): |
|
443 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
444 | u'specification is float'.format(value, cell_index_str, type(value)) |
||
445 | if config_header['fail_on_type_error']: |
||
446 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
447 | else: |
||
448 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
449 | else: |
||
450 | if 'minimum' in config_header['type']: |
||
451 | if value < config_header['type']['minimum']: |
||
452 | msg = u'The value {0} in cell {1} is smaller than the given minimum ' \ |
||
453 | u'of {2}'.format(value, cell_index_str, config_header['type']['minimum']) |
||
454 | if config_header['fail_on_type_error']: |
||
455 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
456 | else: |
||
457 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
458 | if 'maximum' in config_header['type']: |
||
459 | if value > config_header['type']['maximum']: |
||
460 | msg = u'The value {0} in cell {1} is greater than the given maximum ' \ |
||
461 | u'of {2}'.format(value, cell_index_str, config_header['type']['maximum']) |
||
462 | if config_header['fail_on_type_error']: |
||
463 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
464 | else: |
||
465 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
466 | elif config_header['type']['base'] == 'integer': |
||
467 | # Integer values stored by Excel are returned as float (e.g. 3465.0) |
||
468 | # So we have to check if the float can be converted to int without precision loss |
||
469 | if isinstance(value, float): |
||
470 | if value.is_integer(): |
||
471 | # the typecast to int may return int or long, depending on the size of value |
||
472 | value = int(value) |
||
473 | View Code Duplication | if self._is_type_int_long(value): |
|
474 | if 'minimum' in config_header['type']: |
||
475 | if value < config_header['type']['minimum']: |
||
476 | msg = u'The value {0} in cell {1} is smaller than the given minimum ' \ |
||
477 | u'of {2}'.format(value, cell_index_str, config_header['type']['minimum']) |
||
478 | if config_header['fail_on_type_error']: |
||
479 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
480 | else: |
||
481 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
482 | if 'maximum' in config_header['type']: |
||
483 | if value > config_header['type']['maximum']: |
||
484 | msg = u'The value {0} in cell {1} is greater than the given maximum ' \ |
||
485 | u'of {2}'.format(value, cell_index_str, config_header['type']['maximum']) |
||
486 | if config_header['fail_on_type_error']: |
||
487 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
488 | else: |
||
489 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
490 | else: |
||
491 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
492 | u'specification is int'.format(value, cell_index_str, type(value)) |
||
493 | if config_header['fail_on_type_error']: |
||
494 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
495 | else: |
||
496 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
497 | |||
498 | elif config_header['type']['base'] == 'string': |
||
499 | if self._is_type_numeric(value): |
||
500 | convert_numbers = False |
||
501 | if 'convert_numbers' in config_header['type']: |
||
502 | convert_numbers = config_header['type']['convert_numbers'] |
||
503 | if convert_numbers: |
||
504 | value = str(value) |
||
505 | if not self._is_string(value): |
||
506 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
507 | u'specification is string'.format(value, cell_index_str, type(value)) |
||
508 | if config_header['fail_on_type_error']: |
||
509 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
510 | else: |
||
511 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
512 | else: |
||
513 | if 'pattern' in config_header['type']: |
||
514 | if re.search(config_header['type']['pattern'], value) is None: |
||
515 | msg = u'The value {0} in cell {1} does not follow the ' \ |
||
516 | u'given pattern {2}'.format(value, cell_index_str, |
||
517 | config_header['type']['pattern']) |
||
518 | if config_header['fail_on_type_error']: |
||
519 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
520 | else: |
||
521 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
522 | |||
523 | final_dict[curr_row][header] = value |
||
524 | |||
525 | if is_row_excluded: |
||
526 | # All messages are either raised as exception or logged |
||
527 | # If at all depends on the settings in self.excel_config['filter_properties'] |
||
528 | |||
529 | for msg_type, messages in msg_queue.items(): |
||
530 | for msg in messages['exceptions']: |
||
531 | if self.excel_config['filter_properties']['excluded_' + msg_type]: |
||
532 | # This ends the program on the first exception message |
||
533 | self._raise_value_error(msg) |
||
534 | elif self.excel_config['filter_properties']['excluded_enable_logging']: |
||
535 | # In case we don't want to raise type errors as exception |
||
536 | # we log them in case the user configured so |
||
537 | self._plugin.log.warn(msg) |
||
538 | for msg in messages['logs']: |
||
539 | if self.excel_config['filter_properties']['excluded_enable_logging']: |
||
540 | self._plugin.log.warn(msg) |
||
541 | del final_dict[curr_row] |
||
542 | else: |
||
543 | for msg_type, messages in msg_queue.items(): |
||
544 | for msg in messages['exceptions']: |
||
545 | # This ends the program on the first exception message |
||
546 | self._raise_value_error(msg) |
||
547 | for msg in messages['logs']: |
||
548 | self._plugin.log.warn(msg) |
||
549 | |||
550 | return final_dict |
||
551 | |||
552 | @staticmethod |
||
553 | def _is_string(value): |
||
554 | """ |
||
555 | Tests if a value is of a string type |
||
556 | Python2: unicode, str |
||
557 | Python3: str |
||
558 | |||
559 | :param value: Variable of any type |
||
560 | :return: True if value is of string type else false |
||
561 | """ |
||
562 | try: |
||
563 | str_type = basestring |
||
564 | except NameError: |
||
565 | str_type = str |
||
566 | return isinstance(value, str_type) |
||
567 | |||
568 | @staticmethod |
||
569 | def _is_type_numeric(value): |
||
570 | """ |
||
571 | Tests if a value is of an numeric type. |
||
572 | Python2: int, long, float |
||
573 | Python3: int, float |
||
574 | |||
575 | :param value: Variable of any type |
||
576 | :return: True if value is of integer type else false |
||
577 | """ |
||
578 | numeric_types = (int, float) |
||
579 | try: |
||
580 | long |
||
581 | numeric_types = (int, long, float) |
||
582 | except NameError: |
||
583 | pass |
||
584 | |||
585 | return isinstance(value, numeric_types) |
||
586 | |||
587 | @staticmethod |
||
588 | def _is_type_int_long(value): |
||
589 | """ |
||
590 | Tests if a value is of an integer type. |
||
591 | Python2: int, long |
||
592 | Python3: int |
||
593 | |||
594 | :param value: Variable of any type |
||
595 | :return: True if value is of integer type else false |
||
596 | """ |
||
597 | numeric_types = (int) |
||
598 | try: |
||
599 | long |
||
600 | numeric_types = (int, long) |
||
601 | except NameError: |
||
602 | pass |
||
603 | |||
604 | return isinstance(value, numeric_types) |
||
605 | |||
606 | def _validate_json(self, excel_config_json_path): |
||
607 | |||
608 | try: |
||
609 | with open(excel_config_json_path) as file_pointer: |
||
610 | json_obj = json.load(file_pointer) |
||
611 | # the file is not deserializable as a json object |
||
612 | except ValueError as exc: |
||
613 | self._plugin.log.error('Malformed JSON file: {0} \n {1}'.format(excel_config_json_path, exc)) |
||
614 | raise exc |
||
615 | # some os error occured (e.g file not found or malformed path string) |
||
616 | # have to catch two exception classes: in py2 : IOError; py3: OSError |
||
617 | except (IOError, OSError) as exc: |
||
618 | self._plugin.log.error(exc) |
||
619 | # raise only OSError to make error handling in caller easier |
||
620 | raise OSError() |
||
621 | |||
622 | # validate json object if schema file path is there; otherwise throw warning |
||
623 | try: |
||
624 | with open(JSON_SCHEMA_FILE_PATH) as file_pointer: |
||
625 | schema_obj = json.load(file_pointer) |
||
626 | # the file is not deserializable as a json object |
||
627 | except ValueError as exc: |
||
628 | self._plugin.log.error('Malformed JSON schema file: {0} \n {1}'.format(JSON_SCHEMA_FILE_PATH, exc)) |
||
629 | raise exc |
||
630 | # some os error occured (e.g file not found or malformed path string) |
||
631 | # have to catch two exception classes: in py2 : IOError; py3: OSError |
||
632 | except (IOError, OSError) as exc: |
||
633 | self._plugin.log.error(exc) |
||
634 | # raise only OSError to make error handling in caller easier |
||
635 | raise OSError() |
||
636 | |||
637 | # do the validation |
||
638 | try: |
||
639 | validate(json_obj, schema_obj) |
||
640 | except ValidationError as error: |
||
641 | self._plugin.log.error("Validation failed: {0}".format(error.message)) |
||
642 | raise |
||
643 | except SchemaError: |
||
644 | self._plugin.log.error("Invalid schema file: {0}".format(JSON_SCHEMA_FILE_PATH)) |
||
645 | raise |
||
646 | |||
647 | return json_obj |
||
648 | |||
649 | def _get_sheet(self, workbook): |
||
650 | |||
651 | # get sheet |
||
652 | worksheet = None |
||
653 | if type(self.excel_config['sheet_config']) == int: |
||
654 | worksheet = workbook.worksheets[self.excel_config['sheet_config'] - 1] |
||
655 | elif self.excel_config['sheet_config'] == 'active': |
||
656 | worksheet = workbook.active |
||
657 | elif self.excel_config['sheet_config'].startswith('name'): |
||
658 | worksheet = workbook[self.excel_config['sheet_config'].split(':')[1]] |
||
659 | elif self.excel_config['sheet_config'] == 'first': |
||
660 | worksheet = workbook.worksheets[0] |
||
661 | elif self.excel_config['sheet_config'] == 'last': |
||
662 | worksheet = workbook.worksheets[len(workbook.get_sheet_names()) - 1] |
||
663 | else: |
||
664 | # This cannot happen if json validation was ok |
||
665 | pass |
||
666 | return worksheet |
||
667 | |||
668 | def _raise_value_error(self, msg): |
||
669 | self._plugin.log.error(msg) |
||
670 | raise ValueError(msg.encode('UTF-8', 'ignore')) |
||
671 | |||
672 | def _transform_coordinates(self, row=None, column=None): |
||
673 | if row is None and column is None: |
||
674 | raise ValueError("_transform_coordinates: row and column cannot both be None.") |
||
675 | target_str = '' |
||
676 | if self.excel_config['orientation'] == 'column_based': |
||
677 | if column is not None: |
||
678 | target_str = get_column_letter(column) |
||
679 | if row is not None: |
||
680 | target_str += str(row) |
||
681 | else: |
||
682 | if row is not None: |
||
683 | target_str = get_column_letter(row) |
||
684 | if column is not None: |
||
685 | target_str += str(column) |
||
686 | return target_str |
||
687 |