| Total Complexity | 146 |
| Total Lines | 660 |
| Duplicated Lines | 7.27 % |
| Changes | 14 | ||
| Bugs | 0 | Features | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ExcelValidationPlugin often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | """ |
||
| 27 | class ExcelValidationPlugin: |
||
| 28 | """ |
||
| 29 | Plugin level class of the Excel validation pattern. |
||
| 30 | Note: There are no instances of the ExcelValidationPattern on the app. |
||
| 31 | """ |
||
| 32 | |||
| 33 | def __init__(self, plugin): |
||
| 34 | """ |
||
| 35 | :param plugin: The plugin, which wants to use documents |
||
| 36 | :type plugin: GwBasePattern |
||
| 37 | """ |
||
| 38 | self._plugin = plugin |
||
| 39 | self._app = plugin.app |
||
| 40 | self.excel_config = None |
||
| 41 | |||
| 42 | def read_excel(self, excel_config_json_path, excel_workbook_path): |
||
| 43 | """ |
||
| 44 | Main routine to read an Excel sheet. |
||
| 45 | |||
| 46 | :param excel_config_json_path: The configuration json file |
||
| 47 | :param excel_workbook_path: Relative or absolute path to an Excel workbook |
||
| 48 | :return: Data dictionary with rows/colums as keys and a dictionary of "header": value as items |
||
| 49 | """ |
||
| 50 | |||
| 51 | # The exceptions raised in this method shall be raised to the plugin level |
||
| 52 | self.excel_config = self._validate_json(excel_config_json_path) |
||
| 53 | |||
| 54 | ######################### |
||
| 55 | # Get index configuration |
||
| 56 | ######################### |
||
| 57 | orientation = self.excel_config['orientation'] |
||
| 58 | header_idx_cfg_row_first = self.excel_config['headers_index_config']['row_index']['first'] |
||
| 59 | header_idx_cfg_row_last = self.excel_config['headers_index_config']['row_index']['last'] |
||
| 60 | header_idx_cfg_col_first = self.excel_config['headers_index_config']['column_index']['first'] |
||
| 61 | header_idx_cfg_col_last = self.excel_config['headers_index_config']['column_index']['last'] |
||
| 62 | data_idx_cfg_row_first = self.excel_config['data_index_config']['row_index']['first'] |
||
| 63 | data_idx_cfg_row_last = self.excel_config['data_index_config']['row_index']['last'] |
||
| 64 | data_idx_cfg_col_first = self.excel_config['data_index_config']['column_index']['first'] |
||
| 65 | data_idx_cfg_col_last = self.excel_config['data_index_config']['column_index']['last'] |
||
| 66 | |||
| 67 | ############################################################################ |
||
| 68 | # Rotate coordinates so we can work with virtually column based all the time |
||
| 69 | # Just before addressing the cells a coordinate transformation is done again |
||
| 70 | ############################################################################ |
||
| 71 | if orientation == 'column_based': |
||
| 72 | # Correct the header and data matrix according to the orientation |
||
| 73 | # We rotate the row_based layout so we can work with column_based in mind all the time |
||
| 74 | # Name schema: corrected headers/data index config column/row first/last |
||
| 75 | corr_header_idx_cfg_row_first = header_idx_cfg_row_first |
||
| 76 | corr_header_idx_cfg_row_last = header_idx_cfg_row_last |
||
| 77 | corr_header_idx_cfg_col_first = header_idx_cfg_col_first |
||
| 78 | corr_header_idx_cfg_col_last = header_idx_cfg_col_last |
||
| 79 | corr_data_idx_cfg_row_first = data_idx_cfg_row_first |
||
| 80 | corr_data_idx_cfg_row_last = data_idx_cfg_row_last |
||
| 81 | corr_data_idx_cfg_col_first = data_idx_cfg_col_first |
||
| 82 | corr_data_idx_cfg_col_last = data_idx_cfg_col_last |
||
| 83 | else: |
||
| 84 | # row_based layout |
||
| 85 | corr_header_idx_cfg_row_first = header_idx_cfg_col_first |
||
| 86 | corr_header_idx_cfg_row_last = header_idx_cfg_col_last |
||
| 87 | corr_header_idx_cfg_col_first = header_idx_cfg_row_first |
||
| 88 | corr_header_idx_cfg_col_last = header_idx_cfg_row_last |
||
| 89 | corr_data_idx_cfg_row_first = data_idx_cfg_col_first |
||
| 90 | corr_data_idx_cfg_row_last = data_idx_cfg_col_last |
||
| 91 | corr_data_idx_cfg_col_first = data_idx_cfg_row_first |
||
| 92 | corr_data_idx_cfg_col_last = data_idx_cfg_row_last |
||
| 93 | |||
| 94 | # Used for log and exception messages |
||
| 95 | oriented_row_text = "row" if orientation == 'column_based' else "column" |
||
| 96 | oriented_column_text = "column" if orientation == 'column_based' else "row" |
||
| 97 | |||
| 98 | ###################################### |
||
| 99 | # Set defaults of headers_index_config |
||
| 100 | ###################################### |
||
| 101 | if type(corr_header_idx_cfg_row_first) is not int: |
||
| 102 | # Assume the user wants to use the first row as header row |
||
| 103 | corr_header_idx_cfg_row_first = 1 |
||
| 104 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format( |
||
| 105 | oriented_row_text)) |
||
| 106 | |||
| 107 | if type(corr_header_idx_cfg_row_last) is not int: |
||
| 108 | # Only 1 header row is supported currently, set last header row to first header row |
||
| 109 | corr_header_idx_cfg_row_last = corr_header_idx_cfg_row_first |
||
| 110 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> last to {1}".format( |
||
| 111 | oriented_row_text, corr_header_idx_cfg_row_first)) |
||
| 112 | |||
| 113 | if type(corr_header_idx_cfg_col_first) is not int: |
||
| 114 | # Assume the user wants to start at the first column |
||
| 115 | corr_header_idx_cfg_col_first = 1 |
||
| 116 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format( |
||
| 117 | oriented_column_text)) |
||
| 118 | |||
| 119 | if type(corr_header_idx_cfg_col_last) is not int: |
||
| 120 | if type(corr_data_idx_cfg_col_last) is int: |
||
| 121 | # We don't have a last column in the header config, |
||
| 122 | # so we use what we have in the data config |
||
| 123 | corr_header_idx_cfg_col_last = corr_data_idx_cfg_col_last |
||
| 124 | self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to " |
||
| 125 | "{1}".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
| 126 | |||
| 127 | ################################### |
||
| 128 | # Set defaults of data_index_config |
||
| 129 | ################################### |
||
| 130 | if type(corr_data_idx_cfg_row_first) is not int: |
||
| 131 | # Assume the first data row is the next after corr_header_idx_cfg_row_last |
||
| 132 | corr_data_idx_cfg_row_first = corr_header_idx_cfg_row_last + 1 |
||
| 133 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format( |
||
| 134 | oriented_row_text, corr_data_idx_cfg_row_first)) |
||
| 135 | |||
| 136 | # corr_data_idx_cfg_row_last has no defaults - the user input is master |
||
| 137 | |||
| 138 | if type(corr_data_idx_cfg_col_first) is not int: |
||
| 139 | # Assume the first data column is equal to the first header column |
||
| 140 | corr_data_idx_cfg_col_first = corr_header_idx_cfg_col_first |
||
| 141 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format( |
||
| 142 | oriented_column_text, corr_data_idx_cfg_col_first)) |
||
| 143 | |||
| 144 | if type(corr_data_idx_cfg_col_last) is not int: |
||
| 145 | if type(corr_header_idx_cfg_col_last) is int: |
||
| 146 | # We don't have a last column in the data config, |
||
| 147 | # so we use what we have in the header config |
||
| 148 | corr_data_idx_cfg_col_last = corr_header_idx_cfg_col_last |
||
| 149 | self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> last to " |
||
| 150 | "{1}".format(oriented_column_text, corr_data_idx_cfg_col_last)) |
||
| 151 | |||
| 152 | ################################ |
||
| 153 | # Some more logic checks on rows |
||
| 154 | # (matrix size and row order) |
||
| 155 | ################################ |
||
| 156 | if corr_header_idx_cfg_row_last != corr_header_idx_cfg_row_first: |
||
| 157 | # We can compare both because at this point they have to be integer |
||
| 158 | # Multi line headers given |
||
| 159 | self._raise_value_error("Config error: Multi line (grouped) headers are not yet supported. " |
||
| 160 | "First and last header {0} must be equal.".format(oriented_row_text)) |
||
| 161 | |||
| 162 | if corr_data_idx_cfg_row_first <= corr_header_idx_cfg_row_last: |
||
| 163 | # We can compare both because at this point they have to be integer |
||
| 164 | # The data section is above the header section |
||
| 165 | self._raise_value_error("Config error: headers_index_config -> {0}_index -> last is greater than " |
||
| 166 | "data_index_config -> {0}_index -> first.".format(oriented_row_text)) |
||
| 167 | |||
| 168 | if type(corr_data_idx_cfg_row_last) is int: |
||
| 169 | if corr_data_idx_cfg_row_last < corr_data_idx_cfg_row_first: |
||
| 170 | # The last data row is smaller than the first |
||
| 171 | self._raise_value_error("Config error: data_index_config -> {0}_index -> first is greater than " |
||
| 172 | "data_index_config -> {0}_index -> last.".format(oriented_row_text)) |
||
| 173 | |||
| 174 | ################################### |
||
| 175 | # Some more logic checks on columns |
||
| 176 | # (mismatches) |
||
| 177 | ################################### |
||
| 178 | if corr_header_idx_cfg_col_first != corr_data_idx_cfg_col_first: |
||
| 179 | # We can compare both because at this point they have to be integer |
||
| 180 | # First column mismatch |
||
| 181 | self._raise_value_error("Config error: header_index_config -> {0}_index -> first is not equal to " |
||
| 182 | "data_index_config -> {0}_index -> first.".format(oriented_column_text)) |
||
| 183 | |||
| 184 | if type(corr_header_idx_cfg_col_last) is int: |
||
| 185 | if type(corr_data_idx_cfg_col_last) is int: |
||
| 186 | if corr_header_idx_cfg_col_last != corr_data_idx_cfg_col_last: |
||
| 187 | # Last columns are given but do not match |
||
| 188 | self._raise_value_error( |
||
| 189 | "Config error: header_index_config -> {0}_index -> last ({1}) is not equal to " |
||
| 190 | "data_index_config -> {0}_index -> last ({2}).".format( |
||
| 191 | oriented_column_text, |
||
| 192 | corr_header_idx_cfg_col_last, |
||
| 193 | corr_data_idx_cfg_col_last |
||
| 194 | )) |
||
| 195 | |||
| 196 | if type(corr_header_idx_cfg_col_last) is not int: |
||
| 197 | if type(corr_data_idx_cfg_col_last) is not int: |
||
| 198 | # The column count search is done on the header row, not on data |
||
| 199 | if corr_data_idx_cfg_col_last != 'automatic': |
||
| 200 | self._raise_value_error( |
||
| 201 | "Config error: data_index_config -> {0}_index -> last ({1}) may only be an integer or " |
||
| 202 | "contain the value 'automatic'.".format(oriented_column_text, |
||
| 203 | corr_data_idx_cfg_col_last)) |
||
| 204 | |||
| 205 | ######################################### |
||
| 206 | # Set defaults for optional config values |
||
| 207 | ######################################### |
||
| 208 | if 'sheet_config' not in self.excel_config: |
||
| 209 | self.excel_config['sheet_config'] = 'active' |
||
| 210 | |||
| 211 | for data_type_config in self.excel_config['data_type_config']: |
||
| 212 | # default for possible problems should be strict if user tells nothing |
||
| 213 | if 'fail_on_type_error' not in data_type_config: |
||
| 214 | data_type_config['fail_on_type_error'] = True |
||
| 215 | if 'fail_on_empty_cell' not in data_type_config: |
||
| 216 | data_type_config['fail_on_empty_cell'] = True |
||
| 217 | if 'fail_on_header_not_found' not in data_type_config: |
||
| 218 | data_type_config['fail_on_header_not_found'] = True |
||
| 219 | |||
| 220 | # default type is automatic |
||
| 221 | if 'type' not in data_type_config: |
||
| 222 | data_type_config['type'] = {'base': 'automatic'} |
||
| 223 | |||
| 224 | # Set global filter properties |
||
| 225 | # Defensive variant is True for all options |
||
| 226 | if 'filter_properties' not in self.excel_config: |
||
| 227 | self.excel_config['filter_properties'] = {} |
||
| 228 | if 'excluded_fail_on_type_error' not in self.excel_config['filter_properties']: |
||
| 229 | self.excel_config['filter_properties']['excluded_fail_on_type_error'] = True |
||
| 230 | if 'excluded_fail_on_empty_cell' not in self.excel_config['filter_properties']: |
||
| 231 | self.excel_config['filter_properties']['excluded_fail_on_empty_cell'] = True |
||
| 232 | if 'excluded_enable_logging' not in self.excel_config['filter_properties']: |
||
| 233 | self.excel_config['filter_properties']['excluded_enable_logging'] = True |
||
| 234 | |||
| 235 | ############################ |
||
| 236 | # Get the workbook and sheet |
||
| 237 | ############################ |
||
| 238 | wb = openpyxl.load_workbook(excel_workbook_path, data_only=True) |
||
| 239 | ws = self._get_sheet(wb) |
||
| 240 | |||
| 241 | ############################# |
||
| 242 | # Determine header row length |
||
| 243 | ############################# |
||
| 244 | if type(corr_header_idx_cfg_col_last) == int: |
||
| 245 | # corr_header_idx_cfg_col_last already has the final value |
||
| 246 | pass |
||
| 247 | elif corr_header_idx_cfg_col_last == 'automatic': |
||
| 248 | # automatic: use the length of the header row |
||
| 249 | corr_header_idx_cfg_col_last = len( |
||
| 250 | ws[self._transform_coordinates(row=corr_header_idx_cfg_row_first)]) |
||
| 251 | self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' " |
||
| 252 | "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
| 253 | else: |
||
| 254 | # severalEmptyCells chosen |
||
| 255 | target_empty_cell_count = int(corr_header_idx_cfg_col_last.split(':')[1]) |
||
| 256 | empty_cell_count = 0 |
||
| 257 | curr_column = corr_header_idx_cfg_col_first |
||
| 258 | while empty_cell_count < target_empty_cell_count: |
||
| 259 | value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first, |
||
| 260 | curr_column)].value |
||
| 261 | if value is None: |
||
| 262 | empty_cell_count += 1 |
||
| 263 | curr_column += 1 |
||
| 264 | corr_header_idx_cfg_col_last = curr_column - target_empty_cell_count - 1 |
||
| 265 | self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' " |
||
| 266 | "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last)) |
||
| 267 | |||
| 268 | ################################### |
||
| 269 | # Determine header column locations |
||
| 270 | ################################### |
||
| 271 | spreadsheet_headers2columns = {} |
||
| 272 | for column in range(corr_header_idx_cfg_col_first, corr_header_idx_cfg_col_last + 1): |
||
| 273 | value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first, column)].value |
||
| 274 | if value is not None: |
||
| 275 | spreadsheet_headers2columns[value] = column |
||
| 276 | else: |
||
| 277 | # if the value is None we have either |
||
| 278 | # - one or more empty header cell in between 2 filled header cells. |
||
| 279 | # - some empty header cells at the end of the row. |
||
| 280 | # That might happen because when choosing 'automatic' header row detection, openpyxl functionality |
||
| 281 | # is used. It always returns the length of the longest row in the whole sheet. |
||
| 282 | # If that is not the header row, we have empty cells at the end of the header row. |
||
| 283 | # However, that is not a problem as header values 'None' are not added. |
||
| 284 | pass |
||
| 285 | spreadsheet_headers = spreadsheet_headers2columns.keys() |
||
| 286 | |||
| 287 | ############################################################### |
||
| 288 | # Check for not existing headers on spreadsheet and config side |
||
| 289 | ############################################################### |
||
| 290 | # Build a data_type_config dictionary with header as key |
||
| 291 | config_header_dict = {x['header']: x for x in self.excel_config['data_type_config']} |
||
| 292 | |||
| 293 | # Check: Are config data_type_config headers unique? |
||
| 294 | if len(config_header_dict.keys()) != len(set(config_header_dict.keys())): |
||
| 295 | self._raise_value_error("Config error: data_type_config -> header duplicate entries found.") |
||
| 296 | |||
| 297 | # Check for configured headers not found in spreadsheet |
||
| 298 | missing_headers_in_spreadsheet = list(set(config_header_dict.keys()) - set(spreadsheet_headers)) |
||
| 299 | for header in missing_headers_in_spreadsheet: |
||
| 300 | # Check if the fail_on_header_not_found is true |
||
| 301 | data_type = [x for x in self.excel_config['data_type_config'] if x['header'] == header][0] |
||
| 302 | msg = u"Config error: The header '{0}' could not be found in the spreadsheet.".format(header) |
||
| 303 | if data_type['fail_on_header_not_found']: |
||
| 304 | self._raise_value_error(msg) |
||
| 305 | else: |
||
| 306 | self._plugin.log.error(msg) |
||
| 307 | |||
| 308 | # Check for spreadsheet headers not found in config |
||
| 309 | missing_headers_in_config = list(set(spreadsheet_headers) - set(config_header_dict.keys())) |
||
| 310 | if missing_headers_in_config: |
||
| 311 | self._plugin.log.debug(u"The following spreadsheet headers are not configured for reading: {0}".format( |
||
| 312 | ', '.join([header.replace('\n', ' ') for header in missing_headers_in_config]))) |
||
| 313 | for header in missing_headers_in_config: |
||
| 314 | del spreadsheet_headers2columns[header] |
||
| 315 | |||
| 316 | ######################### |
||
| 317 | # Determine last data row |
||
| 318 | ######################### |
||
| 319 | if type(corr_data_idx_cfg_row_last) == int: |
||
| 320 | # do nothing, the value is already set |
||
| 321 | pass |
||
| 322 | elif corr_data_idx_cfg_row_last == 'automatic': |
||
| 323 | # Take data from library |
||
| 324 | corr_data_idx_cfg_row_last = corr_data_idx_cfg_row_first |
||
| 325 | for curr_column in range(corr_header_idx_cfg_col_first, |
||
| 326 | corr_header_idx_cfg_col_last): |
||
| 327 | len_curr_column = len(ws[self._transform_coordinates(column=curr_column)]) |
||
| 328 | if len_curr_column > corr_data_idx_cfg_row_last: |
||
| 329 | corr_data_idx_cfg_row_last = len_curr_column |
||
| 330 | self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'automatic' " |
||
| 331 | "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last)) |
||
| 332 | else: |
||
| 333 | # severalEmptyCells is chosen |
||
| 334 | target_empty_rows_count = int(corr_data_idx_cfg_row_last.split(':')[1]) |
||
| 335 | last_row_detected = False |
||
| 336 | curr_row = corr_data_idx_cfg_row_first |
||
| 337 | empty_rows_count = 0 |
||
| 338 | while not last_row_detected: |
||
| 339 | # go through rows |
||
| 340 | all_columns_empty = True |
||
| 341 | for header in spreadsheet_headers2columns: |
||
| 342 | curr_column = spreadsheet_headers2columns[header] |
||
| 343 | value = ws[self._transform_coordinates(curr_row, curr_column)].value |
||
| 344 | if value is not None: |
||
| 345 | all_columns_empty = False |
||
| 346 | break |
||
| 347 | if all_columns_empty: |
||
| 348 | empty_rows_count += 1 |
||
| 349 | if empty_rows_count >= target_empty_rows_count: |
||
| 350 | last_row_detected = True |
||
| 351 | else: |
||
| 352 | curr_row += 1 |
||
| 353 | corr_data_idx_cfg_row_last = curr_row - target_empty_rows_count |
||
| 354 | self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'severalEmptyCells' " |
||
| 355 | "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last)) |
||
| 356 | |||
| 357 | ################################################# |
||
| 358 | # Go through the rows, read and validate the data |
||
| 359 | ################################################# |
||
| 360 | final_dict = {} |
||
| 361 | for curr_row in range(corr_data_idx_cfg_row_first, corr_data_idx_cfg_row_last + 1): |
||
| 362 | # Go through rows |
||
| 363 | final_dict[curr_row] = {} |
||
| 364 | |||
| 365 | msg_queue = { |
||
| 366 | 'fail_on_empty_cell': { |
||
| 367 | 'exceptions': [], |
||
| 368 | 'logs': [] |
||
| 369 | }, |
||
| 370 | 'fail_on_type_error': { |
||
| 371 | 'exceptions': [], |
||
| 372 | 'logs': [] |
||
| 373 | } |
||
| 374 | } |
||
| 375 | is_row_excluded = False |
||
| 376 | |||
| 377 | for header in spreadsheet_headers2columns: |
||
| 378 | # Go through columns |
||
| 379 | curr_column = spreadsheet_headers2columns[header] |
||
| 380 | cell_index_str = self._transform_coordinates(curr_row, curr_column) |
||
| 381 | value = ws[cell_index_str].value |
||
| 382 | |||
| 383 | # Start the validation |
||
| 384 | config_header = config_header_dict[header] |
||
| 385 | |||
| 386 | if value is None: |
||
| 387 | msg = u"The '{0}' in cell {1} is empty".format(header, cell_index_str) |
||
| 388 | if config_header['fail_on_empty_cell']: |
||
| 389 | msg_queue['fail_on_empty_cell']['exceptions'].append(msg) |
||
| 390 | else: |
||
| 391 | msg_queue['fail_on_empty_cell']['logs'].append(msg) |
||
| 392 | else: |
||
| 393 | if config_header['type']['base'] == 'automatic': |
||
| 394 | pass |
||
| 395 | elif config_header['type']['base'] == 'date': |
||
| 396 | if not isinstance(value, datetime.datetime): |
||
| 397 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
| 398 | u'specification is datetime'.format(value, cell_index_str, type(value)) |
||
| 399 | if config_header['fail_on_type_error']: |
||
| 400 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 401 | else: |
||
| 402 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 403 | elif config_header['type']['base'] == 'enum': |
||
| 404 | filtered_enum_values = [] |
||
| 405 | if 'filter' in config_header['type']: |
||
| 406 | filtered_enum_values = config_header['type']['filter']['whitelist_values'] |
||
| 407 | if not self._is_string(value): |
||
| 408 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
| 409 | u'specification is a string type (enum)'.format(value, cell_index_str, type(value)) |
||
| 410 | if config_header['fail_on_type_error']: |
||
| 411 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 412 | else: |
||
| 413 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 414 | if filtered_enum_values: |
||
| 415 | msg = 'Cannot apply enum filter to cell {1} because the type check failed.' |
||
| 416 | self._plugin.log.error(msg) |
||
| 417 | else: |
||
| 418 | valid_values = config_header['type']['enum_values'] |
||
| 419 | if value not in valid_values: |
||
| 420 | msg = u'The value {0} in cell {1} is not contained in the given enum ' \ |
||
| 421 | u'[{2}]'.format(value, cell_index_str, ', '.join(valid_values)) |
||
| 422 | if config_header['fail_on_type_error']: |
||
| 423 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 424 | else: |
||
| 425 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 426 | if filtered_enum_values: |
||
| 427 | msg = 'Cannot apply enum filter to cell {1} because the enum values check failed.' |
||
| 428 | self._plugin.log.error(msg) |
||
| 429 | else: |
||
| 430 | if filtered_enum_values and value not in filtered_enum_values: |
||
| 431 | is_row_excluded = True |
||
| 432 | self._plugin.log.debug(u"The {0} {1} was excluded due to an exclude filter on " |
||
| 433 | u"cell {2} ({3} not in [{4}]).".format( |
||
| 434 | oriented_row_text, |
||
| 435 | curr_row, |
||
| 436 | cell_index_str, |
||
| 437 | value, |
||
| 438 | ', '.join(filtered_enum_values))) |
||
| 439 | |||
| 440 | elif config_header['type']['base'] == 'float': |
||
| 441 | # TODO Allow int, too |
||
| 442 | View Code Duplication | if not isinstance(value, float): |
|
| 443 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
| 444 | u'specification is float'.format(value, cell_index_str, type(value)) |
||
| 445 | if config_header['fail_on_type_error']: |
||
| 446 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 447 | else: |
||
| 448 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 449 | else: |
||
| 450 | if 'minimum' in config_header['type']: |
||
| 451 | if value < config_header['type']['minimum']: |
||
| 452 | msg = u'The value {0} in cell {1} is smaller than the given minimum ' \ |
||
| 453 | u'of {2}'.format(value, cell_index_str, config_header['type']['minimum']) |
||
| 454 | if config_header['fail_on_type_error']: |
||
| 455 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 456 | else: |
||
| 457 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 458 | if 'maximum' in config_header['type']: |
||
| 459 | if value > config_header['type']['maximum']: |
||
| 460 | msg = u'The value {0} in cell {1} is greater than the given maximum ' \ |
||
| 461 | u'of {2}'.format(value, cell_index_str, config_header['type']['maximum']) |
||
| 462 | if config_header['fail_on_type_error']: |
||
| 463 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 464 | else: |
||
| 465 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 466 | elif config_header['type']['base'] == 'integer': |
||
| 467 | # Integer values stored by Excel are returned as float (e.g. 3465.0) |
||
| 468 | # So we have to check if the float can be converted to int without precision loss |
||
| 469 | if isinstance(value, float): |
||
| 470 | if value.is_integer(): |
||
| 471 | # the typecast to int may return int or long, depending on the size of value |
||
| 472 | value = int(value) |
||
| 473 | View Code Duplication | if self._is_type_int_long(value): |
|
| 474 | if 'minimum' in config_header['type']: |
||
| 475 | if value < config_header['type']['minimum']: |
||
| 476 | msg = u'The value {0} in cell {1} is smaller than the given minimum ' \ |
||
| 477 | u'of {2}'.format(value, cell_index_str, config_header['type']['minimum']) |
||
| 478 | if config_header['fail_on_type_error']: |
||
| 479 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 480 | else: |
||
| 481 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 482 | if 'maximum' in config_header['type']: |
||
| 483 | if value > config_header['type']['maximum']: |
||
| 484 | msg = u'The value {0} in cell {1} is greater than the given maximum ' \ |
||
| 485 | u'of {2}'.format(value, cell_index_str, config_header['type']['maximum']) |
||
| 486 | if config_header['fail_on_type_error']: |
||
| 487 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 488 | else: |
||
| 489 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 490 | else: |
||
| 491 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
| 492 | u'specification is int'.format(value, cell_index_str, type(value)) |
||
| 493 | if config_header['fail_on_type_error']: |
||
| 494 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 495 | else: |
||
| 496 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 497 | |||
| 498 | elif config_header['type']['base'] == 'string': |
||
| 499 | if self._is_type_numeric(value): |
||
| 500 | convert_numbers = False |
||
| 501 | if 'convert_numbers' in config_header['type']: |
||
| 502 | convert_numbers = config_header['type']['convert_numbers'] |
||
| 503 | if convert_numbers: |
||
| 504 | value = str(value) |
||
| 505 | if not self._is_string(value): |
||
| 506 | msg = u'The value {0} in cell {1} is of type {2}; required by ' \ |
||
| 507 | u'specification is string'.format(value, cell_index_str, type(value)) |
||
| 508 | if config_header['fail_on_type_error']: |
||
| 509 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 510 | else: |
||
| 511 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 512 | else: |
||
| 513 | if 'pattern' in config_header['type']: |
||
| 514 | if re.search(config_header['type']['pattern'], value) is None: |
||
| 515 | msg = u'The value {0} in cell {1} does not follow the ' \ |
||
| 516 | u'given pattern {2}'.format(value, cell_index_str, |
||
| 517 | config_header['type']['pattern']) |
||
| 518 | if config_header['fail_on_type_error']: |
||
| 519 | msg_queue['fail_on_type_error']['exceptions'].append(msg) |
||
| 520 | else: |
||
| 521 | msg_queue['fail_on_type_error']['logs'].append(msg) |
||
| 522 | |||
| 523 | final_dict[curr_row][header] = value |
||
| 524 | |||
| 525 | if is_row_excluded: |
||
| 526 | # All messages are either raised as exception or logged |
||
| 527 | # If at all depends on the settings in self.excel_config['filter_properties'] |
||
| 528 | |||
| 529 | for msg_type, messages in msg_queue.items(): |
||
| 530 | for msg in messages['exceptions']: |
||
| 531 | if self.excel_config['filter_properties']['excluded_' + msg_type]: |
||
| 532 | # This ends the program on the first exception message |
||
| 533 | self._raise_value_error(msg) |
||
| 534 | elif self.excel_config['filter_properties']['excluded_enable_logging']: |
||
| 535 | # In case we don't want to raise type errors as exception |
||
| 536 | # we log them in case the user configured so |
||
| 537 | self._plugin.log.warn(msg) |
||
| 538 | for msg in messages['logs']: |
||
| 539 | if self.excel_config['filter_properties']['excluded_enable_logging']: |
||
| 540 | self._plugin.log.warn(msg) |
||
| 541 | del final_dict[curr_row] |
||
| 542 | else: |
||
| 543 | for msg_type, messages in msg_queue.items(): |
||
| 544 | for msg in messages['exceptions']: |
||
| 545 | # This ends the program on the first exception message |
||
| 546 | self._raise_value_error(msg) |
||
| 547 | for msg in messages['logs']: |
||
| 548 | self._plugin.log.warn(msg) |
||
| 549 | |||
| 550 | return final_dict |
||
| 551 | |||
| 552 | @staticmethod |
||
| 553 | def _is_string(value): |
||
| 554 | """ |
||
| 555 | Tests if a value is of a string type |
||
| 556 | Python2: unicode, str |
||
| 557 | Python3: str |
||
| 558 | |||
| 559 | :param value: Variable of any type |
||
| 560 | :return: True if value is of string type else false |
||
| 561 | """ |
||
| 562 | try: |
||
| 563 | str_type = basestring |
||
| 564 | except NameError: |
||
| 565 | str_type = str |
||
| 566 | return isinstance(value, str_type) |
||
| 567 | |||
| 568 | @staticmethod |
||
| 569 | def _is_type_numeric(value): |
||
| 570 | """ |
||
| 571 | Tests if a value is of an numeric type. |
||
| 572 | Python2: int, long, float |
||
| 573 | Python3: int, float |
||
| 574 | |||
| 575 | :param value: Variable of any type |
||
| 576 | :return: True if value is of integer type else false |
||
| 577 | """ |
||
| 578 | numeric_types = (int, float) |
||
| 579 | try: |
||
| 580 | long |
||
| 581 | numeric_types = (int, long, float) |
||
| 582 | except NameError: |
||
| 583 | pass |
||
| 584 | |||
| 585 | return isinstance(value, numeric_types) |
||
| 586 | |||
| 587 | @staticmethod |
||
| 588 | def _is_type_int_long(value): |
||
| 589 | """ |
||
| 590 | Tests if a value is of an integer type. |
||
| 591 | Python2: int, long |
||
| 592 | Python3: int |
||
| 593 | |||
| 594 | :param value: Variable of any type |
||
| 595 | :return: True if value is of integer type else false |
||
| 596 | """ |
||
| 597 | numeric_types = (int) |
||
| 598 | try: |
||
| 599 | long |
||
| 600 | numeric_types = (int, long) |
||
| 601 | except NameError: |
||
| 602 | pass |
||
| 603 | |||
| 604 | return isinstance(value, numeric_types) |
||
| 605 | |||
| 606 | def _validate_json(self, excel_config_json_path): |
||
| 607 | |||
| 608 | try: |
||
| 609 | with open(excel_config_json_path) as file_pointer: |
||
| 610 | json_obj = json.load(file_pointer) |
||
| 611 | # the file is not deserializable as a json object |
||
| 612 | except ValueError as exc: |
||
| 613 | self._plugin.log.error('Malformed JSON file: {0} \n {1}'.format(excel_config_json_path, exc)) |
||
| 614 | raise exc |
||
| 615 | # some os error occured (e.g file not found or malformed path string) |
||
| 616 | # have to catch two exception classes: in py2 : IOError; py3: OSError |
||
| 617 | except (IOError, OSError) as exc: |
||
| 618 | self._plugin.log.error(exc) |
||
| 619 | # raise only OSError to make error handling in caller easier |
||
| 620 | raise OSError() |
||
| 621 | |||
| 622 | # validate json object if schema file path is there; otherwise throw warning |
||
| 623 | try: |
||
| 624 | with open(JSON_SCHEMA_FILE_PATH) as file_pointer: |
||
| 625 | schema_obj = json.load(file_pointer) |
||
| 626 | # the file is not deserializable as a json object |
||
| 627 | except ValueError as exc: |
||
| 628 | self._plugin.log.error('Malformed JSON schema file: {0} \n {1}'.format(JSON_SCHEMA_FILE_PATH, exc)) |
||
| 629 | raise exc |
||
| 630 | # some os error occured (e.g file not found or malformed path string) |
||
| 631 | # have to catch two exception classes: in py2 : IOError; py3: OSError |
||
| 632 | except (IOError, OSError) as exc: |
||
| 633 | self._plugin.log.error(exc) |
||
| 634 | # raise only OSError to make error handling in caller easier |
||
| 635 | raise OSError() |
||
| 636 | |||
| 637 | # do the validation |
||
| 638 | try: |
||
| 639 | validate(json_obj, schema_obj) |
||
| 640 | except ValidationError as error: |
||
| 641 | self._plugin.log.error("Validation failed: {0}".format(error.message)) |
||
| 642 | raise |
||
| 643 | except SchemaError: |
||
| 644 | self._plugin.log.error("Invalid schema file: {0}".format(JSON_SCHEMA_FILE_PATH)) |
||
| 645 | raise |
||
| 646 | |||
| 647 | return json_obj |
||
| 648 | |||
| 649 | def _get_sheet(self, workbook): |
||
| 650 | |||
| 651 | # get sheet |
||
| 652 | worksheet = None |
||
| 653 | if type(self.excel_config['sheet_config']) == int: |
||
| 654 | worksheet = workbook.worksheets[self.excel_config['sheet_config'] - 1] |
||
| 655 | elif self.excel_config['sheet_config'] == 'active': |
||
| 656 | worksheet = workbook.active |
||
| 657 | elif self.excel_config['sheet_config'].startswith('name'): |
||
| 658 | worksheet = workbook[self.excel_config['sheet_config'].split(':')[1]] |
||
| 659 | elif self.excel_config['sheet_config'] == 'first': |
||
| 660 | worksheet = workbook.worksheets[0] |
||
| 661 | elif self.excel_config['sheet_config'] == 'last': |
||
| 662 | worksheet = workbook.worksheets[len(workbook.get_sheet_names()) - 1] |
||
| 663 | else: |
||
| 664 | # This cannot happen if json validation was ok |
||
| 665 | pass |
||
| 666 | return worksheet |
||
| 667 | |||
| 668 | def _raise_value_error(self, msg): |
||
| 669 | self._plugin.log.error(msg) |
||
| 670 | raise ValueError(msg.encode('UTF-8', 'ignore')) |
||
| 671 | |||
| 672 | def _transform_coordinates(self, row=None, column=None): |
||
| 673 | if row is None and column is None: |
||
| 674 | raise ValueError("_transform_coordinates: row and column cannot both be None.") |
||
| 675 | target_str = '' |
||
| 676 | if self.excel_config['orientation'] == 'column_based': |
||
| 677 | if column is not None: |
||
| 678 | target_str = get_column_letter(column) |
||
| 679 | if row is not None: |
||
| 680 | target_str += str(row) |
||
| 681 | else: |
||
| 682 | if row is not None: |
||
| 683 | target_str = get_column_letter(row) |
||
| 684 | if column is not None: |
||
| 685 | target_str += str(column) |
||
| 686 | return target_str |
||
| 687 |