GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

ExcelValidationPlugin._is_string()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 15
rs 9.4285
1
"""
2
Groundwork Excel read/write routines using openpyxl
3
"""
4
import datetime
5
import json
6
import os
7
import re
8
9
import openpyxl
10
from groundwork.patterns.gw_base_pattern import GwBasePattern
11
from jsonschema import validate, ValidationError, SchemaError
12
from openpyxl.utils import get_column_letter
0 ignored issues
show
introduced by
Imports from package openpyxl are not grouped
Loading history...
13
14
JSON_SCHEMA_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'excel_config_schema.json')
15
16
17
class ExcelValidationPattern(GwBasePattern):
18
    """
19
    Groundwork Excel read/write routines using openpyxl
20
    """
21
22
    def __init__(self, *args, **kwargs):
23
        super(ExcelValidationPattern, self).__init__(*args, **kwargs)
24
        self.excel_validation = ExcelValidationPlugin(plugin=self)
25
26
27
class ExcelValidationPlugin:
28
    """
29
    Plugin level class of the Excel validation pattern.
30
    Note: There are no instances of the ExcelValidationPattern on the app.
31
    """
32
33
    def __init__(self, plugin):
34
        """
35
        :param plugin: The plugin, which wants to use documents
36
        :type plugin: GwBasePattern
37
        """
38
        self._plugin = plugin
39
        self._app = plugin.app
40
        self.excel_config = None
41
42
    def read_excel(self, excel_config_json_path, excel_workbook_path):
0 ignored issues
show
Comprehensibility introduced by
This function exceeds the maximum number of variables (54/15).
Loading history...
43
        """
44
        Main routine to read an Excel sheet.
45
46
        :param excel_config_json_path: The configuration json file
47
        :param excel_workbook_path: Relative or absolute path to an Excel workbook
48
        :return: Data dictionary with rows/colums as keys and a dictionary of "header": value as items
49
        """
50
51
        # The exceptions raised in this method shall be raised to the plugin level
52
        self.excel_config = self._validate_json(excel_config_json_path)
53
54
        #########################
55
        # Get index configuration
56
        #########################
57
        orientation = self.excel_config['orientation']
58
        header_idx_cfg_row_first = self.excel_config['headers_index_config']['row_index']['first']
59
        header_idx_cfg_row_last = self.excel_config['headers_index_config']['row_index']['last']
60
        header_idx_cfg_col_first = self.excel_config['headers_index_config']['column_index']['first']
61
        header_idx_cfg_col_last = self.excel_config['headers_index_config']['column_index']['last']
62
        data_idx_cfg_row_first = self.excel_config['data_index_config']['row_index']['first']
63
        data_idx_cfg_row_last = self.excel_config['data_index_config']['row_index']['last']
64
        data_idx_cfg_col_first = self.excel_config['data_index_config']['column_index']['first']
65
        data_idx_cfg_col_last = self.excel_config['data_index_config']['column_index']['last']
66
67
        ############################################################################
68
        # Rotate coordinates so we can work with virtually column based all the time
69
        # Just before addressing the cells a coordinate transformation is done again
70
        ############################################################################
71
        if orientation == 'column_based':
72
            # Correct the header and data matrix according to the orientation
73
            # We rotate the row_based layout so we can work with column_based in mind all the time
74
            # Name schema: corrected headers/data index config column/row first/last
75
            corr_header_idx_cfg_row_first = header_idx_cfg_row_first
76
            corr_header_idx_cfg_row_last = header_idx_cfg_row_last
77
            corr_header_idx_cfg_col_first = header_idx_cfg_col_first
78
            corr_header_idx_cfg_col_last = header_idx_cfg_col_last
79
            corr_data_idx_cfg_row_first = data_idx_cfg_row_first
80
            corr_data_idx_cfg_row_last = data_idx_cfg_row_last
81
            corr_data_idx_cfg_col_first = data_idx_cfg_col_first
82
            corr_data_idx_cfg_col_last = data_idx_cfg_col_last
83
        else:
84
            # row_based layout
85
            corr_header_idx_cfg_row_first = header_idx_cfg_col_first
86
            corr_header_idx_cfg_row_last = header_idx_cfg_col_last
87
            corr_header_idx_cfg_col_first = header_idx_cfg_row_first
88
            corr_header_idx_cfg_col_last = header_idx_cfg_row_last
89
            corr_data_idx_cfg_row_first = data_idx_cfg_col_first
90
            corr_data_idx_cfg_row_last = data_idx_cfg_col_last
91
            corr_data_idx_cfg_col_first = data_idx_cfg_row_first
92
            corr_data_idx_cfg_col_last = data_idx_cfg_row_last
93
94
        # Used for log and exception messages
95
        oriented_row_text = "row" if orientation == 'column_based' else "column"
96
        oriented_column_text = "column" if orientation == 'column_based' else "row"
97
98
        ######################################
99
        # Set defaults of headers_index_config
100
        ######################################
101
        if type(corr_header_idx_cfg_row_first) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
102
            # Assume the user wants to use the first row as header row
103
            corr_header_idx_cfg_row_first = 1
104
            self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format(
105
                oriented_row_text))
106
107
        if type(corr_header_idx_cfg_row_last) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
108
            # Only 1 header row is supported currently, set last header row to first header row
109
            corr_header_idx_cfg_row_last = corr_header_idx_cfg_row_first
110
            self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> last to {1}".format(
111
                oriented_row_text, corr_header_idx_cfg_row_first))
112
113
        if type(corr_header_idx_cfg_col_first) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
114
            # Assume the user wants to start at the first column
115
            corr_header_idx_cfg_col_first = 1
116
            self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to 1".format(
117
                oriented_column_text))
118
119
        if type(corr_header_idx_cfg_col_last) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
120
            if type(corr_data_idx_cfg_col_last) is int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
121
                # We don't have a last column in the header config,
122
                # so we use what we have in the data config
123
                corr_header_idx_cfg_col_last = corr_data_idx_cfg_col_last
124
                self._plugin.log.debug("Config update: Setting headers_index_config -> {0}_index -> first to "
125
                                       "{1}".format(oriented_column_text, corr_header_idx_cfg_col_last))
126
127
        ###################################
128
        # Set defaults of data_index_config
129
        ###################################
130
        if type(corr_data_idx_cfg_row_first) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
131
            # Assume the first data row is the next after corr_header_idx_cfg_row_last
132
            corr_data_idx_cfg_row_first = corr_header_idx_cfg_row_last + 1
133
            self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format(
134
                oriented_row_text, corr_data_idx_cfg_row_first))
135
136
        # corr_data_idx_cfg_row_last has no defaults - the user input is master
137
138
        if type(corr_data_idx_cfg_col_first) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
139
            # Assume the first data column is equal to the first header column
140
            corr_data_idx_cfg_col_first = corr_header_idx_cfg_col_first
141
            self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> first to {1}".format(
142
                oriented_column_text, corr_data_idx_cfg_col_first))
143
144
        if type(corr_data_idx_cfg_col_last) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
145
            if type(corr_header_idx_cfg_col_last) is int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
146
                # We don't have a last column in the data config,
147
                # so we use what we have in the header config
148
                corr_data_idx_cfg_col_last = corr_header_idx_cfg_col_last
149
                self._plugin.log.debug("Config update: Setting data_index_config -> {0}_index -> last to "
150
                                       "{1}".format(oriented_column_text, corr_data_idx_cfg_col_last))
151
152
        ################################
153
        # Some more logic checks on rows
154
        # (matrix size and row order)
155
        ################################
156
        if corr_header_idx_cfg_row_last != corr_header_idx_cfg_row_first:
157
            # We can compare both because at this point they have to be integer
158
            # Multi line headers given
159
            self._raise_value_error("Config error: Multi line (grouped) headers are not yet supported. "
160
                                    "First and last header {0} must be equal.".format(oriented_row_text))
161
162
        if corr_data_idx_cfg_row_first <= corr_header_idx_cfg_row_last:
163
            # We can compare both because at this point they have to be integer
164
            # The data section is above the header section
165
            self._raise_value_error("Config error: headers_index_config -> {0}_index -> last is greater than "
166
                                    "data_index_config -> {0}_index -> first.".format(oriented_row_text))
167
168
        if type(corr_data_idx_cfg_row_last) is int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
169
            if corr_data_idx_cfg_row_last < corr_data_idx_cfg_row_first:
170
                # The last data row is smaller than the first
171
                self._raise_value_error("Config error: data_index_config -> {0}_index -> first is greater than "
172
                                        "data_index_config -> {0}_index -> last.".format(oriented_row_text))
173
174
        ###################################
175
        # Some more logic checks on columns
176
        # (mismatches)
177
        ###################################
178
        if corr_header_idx_cfg_col_first != corr_data_idx_cfg_col_first:
179
            # We can compare both because at this point they have to be integer
180
            # First column mismatch
181
            self._raise_value_error("Config error: header_index_config -> {0}_index -> first is not equal to "
182
                                    "data_index_config -> {0}_index -> first.".format(oriented_column_text))
183
184
        if type(corr_header_idx_cfg_col_last) is int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
185
            if type(corr_data_idx_cfg_col_last) is int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
186
                if corr_header_idx_cfg_col_last != corr_data_idx_cfg_col_last:
187
                    # Last columns are given but do not match
188
                    self._raise_value_error(
189
                        "Config error: header_index_config -> {0}_index -> last ({1}) is not equal to "
190
                        "data_index_config -> {0}_index -> last ({2}).".format(
191
                            oriented_column_text,
192
                            corr_header_idx_cfg_col_last,
193
                            corr_data_idx_cfg_col_last
194
                        ))
195
196
        if type(corr_header_idx_cfg_col_last) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
197
            if type(corr_data_idx_cfg_col_last) is not int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
198
                # The column count search is done on the header row, not on data
199
                if corr_data_idx_cfg_col_last != 'automatic':
200
                    self._raise_value_error(
201
                        "Config error: data_index_config -> {0}_index -> last ({1}) may only be an integer or "
202
                        "contain the value 'automatic'.".format(oriented_column_text,
203
                                                                corr_data_idx_cfg_col_last))
204
205
        #########################################
206
        # Set defaults for optional config values
207
        #########################################
208
        if 'sheet_config' not in self.excel_config:
209
            self.excel_config['sheet_config'] = 'active'
210
211
        for data_type_config in self.excel_config['data_type_config']:
212
            # default for possible problems should be strict if user tells nothing
213
            if 'fail_on_type_error' not in data_type_config:
214
                data_type_config['fail_on_type_error'] = True
215
            if 'fail_on_empty_cell' not in data_type_config:
216
                data_type_config['fail_on_empty_cell'] = True
217
            if 'fail_on_header_not_found' not in data_type_config:
218
                data_type_config['fail_on_header_not_found'] = True
219
220
            # default type is automatic
221
            if 'type' not in data_type_config:
222
                data_type_config['type'] = {'base': 'automatic'}
223
224
        # Set global filter properties
225
        # Defensive variant is True for all options
226
        if 'filter_properties' not in self.excel_config:
227
            self.excel_config['filter_properties'] = {}
228
        if 'excluded_fail_on_type_error' not in self.excel_config['filter_properties']:
229
            self.excel_config['filter_properties']['excluded_fail_on_type_error'] = True
230
        if 'excluded_fail_on_empty_cell' not in self.excel_config['filter_properties']:
231
            self.excel_config['filter_properties']['excluded_fail_on_empty_cell'] = True
232
        if 'excluded_enable_logging' not in self.excel_config['filter_properties']:
233
            self.excel_config['filter_properties']['excluded_enable_logging'] = True
234
235
        ############################
236
        # Get the workbook and sheet
237
        ############################
238
        wb = openpyxl.load_workbook(excel_workbook_path, data_only=True)
0 ignored issues
show
Coding Style Naming introduced by
The name wb does not conform to the variable naming conventions ((([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
239
        ws = self._get_sheet(wb)
0 ignored issues
show
Coding Style Naming introduced by
The name ws does not conform to the variable naming conventions ((([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
240
241
        #############################
242
        # Determine header row length
243
        #############################
244
        if type(corr_header_idx_cfg_col_last) == int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
245
            # corr_header_idx_cfg_col_last already has the final value
246
            pass
247
        elif corr_header_idx_cfg_col_last == 'automatic':
248
            # automatic: use the length of the header row
249
            corr_header_idx_cfg_col_last = len(
250
                ws[self._transform_coordinates(row=corr_header_idx_cfg_row_first)])
251
            self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' "
252
                                   "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last))
253
        else:
254
            # severalEmptyCells chosen
255
            target_empty_cell_count = int(corr_header_idx_cfg_col_last.split(':')[1])
256
            empty_cell_count = 0
257
            curr_column = corr_header_idx_cfg_col_first
258
            while empty_cell_count < target_empty_cell_count:
259
                value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first,
260
                                                       curr_column)].value
261
                if value is None:
262
                    empty_cell_count += 1
263
                curr_column += 1
264
            corr_header_idx_cfg_col_last = curr_column - target_empty_cell_count - 1
265
            self._plugin.log.debug("Config update: Last header {0} was set to {1} using the 'automatic' "
266
                                   "mechanism.".format(oriented_column_text, corr_header_idx_cfg_col_last))
267
268
        ###################################
269
        # Determine header column locations
270
        ###################################
271
        spreadsheet_headers2columns = {}
272
        for column in range(corr_header_idx_cfg_col_first, corr_header_idx_cfg_col_last + 1):
273
            value = ws[self._transform_coordinates(corr_header_idx_cfg_row_first, column)].value
274
            if value is not None:
275
                spreadsheet_headers2columns[value] = column
276
            else:
277
                # if the value is None we have either
278
                # - one or more empty header cell in between 2 filled header cells.
279
                # - some empty header cells at the end of the row.
280
                #   That might happen because when choosing 'automatic' header row detection, openpyxl functionality
281
                #   is used. It always returns the length of the longest row in the whole sheet.
282
                #   If that is not the header row, we have empty cells at the end of the header row.
283
                #   However, that is not a problem as header values 'None' are not added.
284
                pass
285
        spreadsheet_headers = spreadsheet_headers2columns.keys()
286
287
        ###############################################################
288
        # Check for not existing headers on spreadsheet and config side
289
        ###############################################################
290
        # Build a data_type_config dictionary with header as key
291
        config_header_dict = {x['header']: x for x in self.excel_config['data_type_config']}
292
293
        # Check: Are config data_type_config headers unique?
294
        if len(config_header_dict.keys()) != len(set(config_header_dict.keys())):
295
            self._raise_value_error("Config error: data_type_config -> header duplicate entries found.")
296
297
        # Check for configured headers not found in spreadsheet
298
        missing_headers_in_spreadsheet = list(set(config_header_dict.keys()) - set(spreadsheet_headers))
299
        for header in missing_headers_in_spreadsheet:
300
            # Check if the fail_on_header_not_found is true
301
            data_type = [x for x in self.excel_config['data_type_config'] if x['header'] == header][0]
302
            msg = u"Config error: The header '{0}' could not be found in the spreadsheet.".format(header)
303
            if data_type['fail_on_header_not_found']:
304
                self._raise_value_error(msg)
305
            else:
306
                self._plugin.log.error(msg)
307
308
        # Check for spreadsheet headers not found in config
309
        missing_headers_in_config = list(set(spreadsheet_headers) - set(config_header_dict.keys()))
310
        if missing_headers_in_config:
311
            self._plugin.log.debug(u"The following spreadsheet headers are not configured for reading: {0}".format(
312
                ', '.join([header.replace('\n', ' ') for header in missing_headers_in_config])))
313
            for header in missing_headers_in_config:
314
                del spreadsheet_headers2columns[header]
315
316
        #########################
317
        # Determine last data row
318
        #########################
319
        if type(corr_data_idx_cfg_row_last) == int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
320
            # do nothing, the value is already set
321
            pass
322
        elif corr_data_idx_cfg_row_last == 'automatic':
323
            # Take data from library
324
            corr_data_idx_cfg_row_last = corr_data_idx_cfg_row_first
325
            for curr_column in range(corr_header_idx_cfg_col_first,
326
                                     corr_header_idx_cfg_col_last):
327
                len_curr_column = len(ws[self._transform_coordinates(column=curr_column)])
328
                if len_curr_column > corr_data_idx_cfg_row_last:
329
                    corr_data_idx_cfg_row_last = len_curr_column
330
            self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'automatic' "
331
                                   "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last))
332
        else:
333
            # severalEmptyCells is chosen
334
            target_empty_rows_count = int(corr_data_idx_cfg_row_last.split(':')[1])
335
            last_row_detected = False
336
            curr_row = corr_data_idx_cfg_row_first
337
            empty_rows_count = 0
338
            while not last_row_detected:
339
                # go through rows
340
                all_columns_empty = True
341
                for header in spreadsheet_headers2columns:
342
                    curr_column = spreadsheet_headers2columns[header]
343
                    value = ws[self._transform_coordinates(curr_row, curr_column)].value
344
                    if value is not None:
345
                        all_columns_empty = False
346
                        break
347
                if all_columns_empty:
348
                    empty_rows_count += 1
349
                if empty_rows_count >= target_empty_rows_count:
350
                    last_row_detected = True
351
                else:
352
                    curr_row += 1
353
            corr_data_idx_cfg_row_last = curr_row - target_empty_rows_count
354
            self._plugin.log.debug("Config update: Last data {0} was set to {1} using the 'severalEmptyCells' "
355
                                   "mechanism.".format(oriented_row_text, corr_data_idx_cfg_row_last))
356
357
        #################################################
358
        # Go through the rows, read and validate the data
359
        #################################################
360
        final_dict = {}
361
        for curr_row in range(corr_data_idx_cfg_row_first, corr_data_idx_cfg_row_last + 1):
0 ignored issues
show
unused-code introduced by
Too many nested blocks (6/5)
Loading history...
unused-code introduced by
Too many nested blocks (7/5)
Loading history...
unused-code introduced by
Too many nested blocks (8/5)
Loading history...
362
            # Go through rows
363
            final_dict[curr_row] = {}
364
365
            msg_queue = {
366
                'fail_on_empty_cell': {
367
                    'exceptions': [],
368
                    'logs': []
369
                },
370
                'fail_on_type_error': {
371
                    'exceptions': [],
372
                    'logs': []
373
                }
374
            }
375
            is_row_excluded = False
376
377
            for header in spreadsheet_headers2columns:
378
                # Go through columns
379
                curr_column = spreadsheet_headers2columns[header]
380
                cell_index_str = self._transform_coordinates(curr_row, curr_column)
381
                value = ws[cell_index_str].value
382
383
                # Start the validation
384
                config_header = config_header_dict[header]
385
386
                if value is None:
387
                    msg = u"The '{0}' in cell {1} is empty".format(header, cell_index_str)
388
                    if config_header['fail_on_empty_cell']:
389
                        msg_queue['fail_on_empty_cell']['exceptions'].append(msg)
390
                    else:
391
                        msg_queue['fail_on_empty_cell']['logs'].append(msg)
392
                else:
393
                    if config_header['type']['base'] == 'automatic':
394
                        pass
395
                    elif config_header['type']['base'] == 'date':
396
                        if not isinstance(value, datetime.datetime):
397
                            msg = u'The value {0} in cell {1} is of type {2}; required by ' \
398
                                  u'specification is datetime'.format(value, cell_index_str, type(value))
399
                            if config_header['fail_on_type_error']:
400
                                msg_queue['fail_on_type_error']['exceptions'].append(msg)
401
                            else:
402
                                msg_queue['fail_on_type_error']['logs'].append(msg)
403
                    elif config_header['type']['base'] == 'enum':
404
                        filtered_enum_values = []
405
                        if 'filter' in config_header['type']:
406
                            filtered_enum_values = config_header['type']['filter']['whitelist_values']
407
                        if not self._is_string(value):
408
                            msg = u'The value {0} in cell {1} is of type {2}; required by ' \
409
                                  u'specification is a string type (enum)'.format(value, cell_index_str, type(value))
410
                            if config_header['fail_on_type_error']:
411
                                msg_queue['fail_on_type_error']['exceptions'].append(msg)
412
                            else:
413
                                msg_queue['fail_on_type_error']['logs'].append(msg)
414
                            if filtered_enum_values:
415
                                msg = 'Cannot apply enum filter to cell {1} because the type check failed.'
416
                                self._plugin.log.error(msg)
417
                        else:
418
                            valid_values = config_header['type']['enum_values']
419
                            if value not in valid_values:
420
                                msg = u'The value {0} in cell {1} is not contained in the given enum ' \
421
                                      u'[{2}]'.format(value, cell_index_str, ', '.join(valid_values))
422
                                if config_header['fail_on_type_error']:
423
                                    msg_queue['fail_on_type_error']['exceptions'].append(msg)
424
                                else:
425
                                    msg_queue['fail_on_type_error']['logs'].append(msg)
426
                                if filtered_enum_values:
427
                                    msg = 'Cannot apply enum filter to cell {1} because the enum values check failed.'
428
                                    self._plugin.log.error(msg)
429
                            else:
430
                                if filtered_enum_values and value not in filtered_enum_values:
431
                                    is_row_excluded = True
432
                                    self._plugin.log.debug(u"The {0} {1} was excluded due to an exclude filter on "
433
                                                           u"cell {2} ({3} not in [{4}]).".format(
434
                                                               oriented_row_text,
435
                                                               curr_row,
436
                                                               cell_index_str,
437
                                                               value,
438
                                                               ', '.join(filtered_enum_values)))
439
440
                    elif config_header['type']['base'] == 'float':
441
                        # TODO Allow int, too
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
442 View Code Duplication
                        if not isinstance(value, float):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
443
                            msg = u'The value {0} in cell {1} is of type {2}; required by ' \
444
                                  u'specification is float'.format(value, cell_index_str, type(value))
445
                            if config_header['fail_on_type_error']:
446
                                msg_queue['fail_on_type_error']['exceptions'].append(msg)
447
                            else:
448
                                msg_queue['fail_on_type_error']['logs'].append(msg)
449
                        else:
450
                            if 'minimum' in config_header['type']:
451
                                if value < config_header['type']['minimum']:
452
                                    msg = u'The value {0} in cell {1} is smaller than the given minimum ' \
453
                                          u'of {2}'.format(value, cell_index_str, config_header['type']['minimum'])
454
                                    if config_header['fail_on_type_error']:
455
                                        msg_queue['fail_on_type_error']['exceptions'].append(msg)
456
                                    else:
457
                                        msg_queue['fail_on_type_error']['logs'].append(msg)
458
                            if 'maximum' in config_header['type']:
459
                                if value > config_header['type']['maximum']:
460
                                    msg = u'The value {0} in cell {1} is greater than the given maximum ' \
461
                                          u'of {2}'.format(value, cell_index_str, config_header['type']['maximum'])
462
                                    if config_header['fail_on_type_error']:
463
                                        msg_queue['fail_on_type_error']['exceptions'].append(msg)
464
                                    else:
465
                                        msg_queue['fail_on_type_error']['logs'].append(msg)
466
                    elif config_header['type']['base'] == 'integer':
467
                        # Integer values stored by Excel are returned as float (e.g. 3465.0)
468
                        # So we have to check if the float can be converted to int without precision loss
469
                        if isinstance(value, float):
470
                            if value.is_integer():
471
                                # the typecast to int may return int or long, depending on the size of value
472
                                value = int(value)
473 View Code Duplication
                        if self._is_type_int_long(value):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
474
                            if 'minimum' in config_header['type']:
475
                                if value < config_header['type']['minimum']:
476
                                    msg = u'The value {0} in cell {1} is smaller than the given minimum ' \
477
                                          u'of {2}'.format(value, cell_index_str, config_header['type']['minimum'])
478
                                    if config_header['fail_on_type_error']:
479
                                        msg_queue['fail_on_type_error']['exceptions'].append(msg)
480
                                    else:
481
                                        msg_queue['fail_on_type_error']['logs'].append(msg)
482
                            if 'maximum' in config_header['type']:
483
                                if value > config_header['type']['maximum']:
484
                                    msg = u'The value {0} in cell {1} is greater than the given maximum ' \
485
                                          u'of {2}'.format(value, cell_index_str, config_header['type']['maximum'])
486
                                    if config_header['fail_on_type_error']:
487
                                        msg_queue['fail_on_type_error']['exceptions'].append(msg)
488
                                    else:
489
                                        msg_queue['fail_on_type_error']['logs'].append(msg)
490
                        else:
491
                            msg = u'The value {0} in cell {1} is of type {2}; required by ' \
492
                                  u'specification is int'.format(value, cell_index_str, type(value))
493
                            if config_header['fail_on_type_error']:
494
                                msg_queue['fail_on_type_error']['exceptions'].append(msg)
495
                            else:
496
                                msg_queue['fail_on_type_error']['logs'].append(msg)
497
498
                    elif config_header['type']['base'] == 'string':
499
                        if self._is_type_numeric(value):
500
                            convert_numbers = False
501
                            if 'convert_numbers' in config_header['type']:
502
                                convert_numbers = config_header['type']['convert_numbers']
503
                            if convert_numbers:
504
                                value = str(value)
505
                        if not self._is_string(value):
506
                            msg = u'The value {0} in cell {1} is of type {2}; required by ' \
507
                                  u'specification is string'.format(value, cell_index_str, type(value))
508
                            if config_header['fail_on_type_error']:
509
                                msg_queue['fail_on_type_error']['exceptions'].append(msg)
510
                            else:
511
                                msg_queue['fail_on_type_error']['logs'].append(msg)
512
                        else:
513
                            if 'pattern' in config_header['type']:
514
                                if re.search(config_header['type']['pattern'], value) is None:
515
                                    msg = u'The value {0} in cell {1} does not follow the ' \
516
                                          u'given pattern {2}'.format(value, cell_index_str,
517
                                                                      config_header['type']['pattern'])
518
                                    if config_header['fail_on_type_error']:
519
                                        msg_queue['fail_on_type_error']['exceptions'].append(msg)
520
                                    else:
521
                                        msg_queue['fail_on_type_error']['logs'].append(msg)
522
523
                final_dict[curr_row][header] = value
524
525
            if is_row_excluded:
526
                # All messages are either raised as exception or logged
527
                # If at all depends on the settings in self.excel_config['filter_properties']
528
529
                for msg_type, messages in msg_queue.items():
530
                    for msg in messages['exceptions']:
531
                        if self.excel_config['filter_properties']['excluded_' + msg_type]:
532
                            # This ends the program on the first exception message
533
                            self._raise_value_error(msg)
534
                        elif self.excel_config['filter_properties']['excluded_enable_logging']:
535
                            # In case we don't want to raise type errors as exception
536
                            # we log them in case the user configured so
537
                            self._plugin.log.warn(msg)
538
                    for msg in messages['logs']:
539
                        if self.excel_config['filter_properties']['excluded_enable_logging']:
540
                            self._plugin.log.warn(msg)
541
                del final_dict[curr_row]
542
            else:
543
                for msg_type, messages in msg_queue.items():
544
                    for msg in messages['exceptions']:
545
                        # This ends the program on the first exception message
546
                        self._raise_value_error(msg)
547
                    for msg in messages['logs']:
548
                        self._plugin.log.warn(msg)
549
550
        return final_dict
551
552
    @staticmethod
553
    def _is_string(value):
554
        """
555
        Tests if a value is of a string type
556
        Python2: unicode, str
557
        Python3: str
558
559
        :param value: Variable of any type
560
        :return: True if value is of string type else false
561
        """
562
        try:
563
            str_type = basestring
564
        except NameError:
565
            str_type = str
566
        return isinstance(value, str_type)
567
568
    @staticmethod
569
    def _is_type_numeric(value):
570
        """
571
        Tests if a value is of an numeric type.
572
        Python2: int, long, float
573
        Python3: int, float
574
575
        :param value: Variable of any type
576
        :return: True if value is of integer type else false
577
        """
578
        numeric_types = (int, float)
579
        try:
580
            long
0 ignored issues
show
Unused Code introduced by
This statement seems to have no effect and could be removed.

This issue is typically triggered when a function that does not have side-effects is called and the return value is discarded:

class SomeClass:
    def __init__(self):
        self._x = 5

    def squared(self):
        return self._x * self._x

some_class = SomeClass()
some_class.squared()        # Flagged, as the return value is not used
print(some_class.squared()) # Ok
Loading history...
581
            numeric_types = (int, long, float)
582
        except NameError:
583
            pass
584
585
        return isinstance(value, numeric_types)
586
587
    @staticmethod
588
    def _is_type_int_long(value):
589
        """
590
        Tests if a value is of an integer type.
591
        Python2: int, long
592
        Python3: int
593
594
        :param value: Variable of any type
595
        :return: True if value is of integer type else false
596
        """
597
        numeric_types = (int)
598
        try:
599
            long
0 ignored issues
show
Unused Code introduced by
This statement seems to have no effect and could be removed.

This issue is typically triggered when a function that does not have side-effects is called and the return value is discarded:

class SomeClass:
    def __init__(self):
        self._x = 5

    def squared(self):
        return self._x * self._x

some_class = SomeClass()
some_class.squared()        # Flagged, as the return value is not used
print(some_class.squared()) # Ok
Loading history...
600
            numeric_types = (int, long)
601
        except NameError:
602
            pass
603
604
        return isinstance(value, numeric_types)
605
606
    def _validate_json(self, excel_config_json_path):
607
608
        try:
609
            with open(excel_config_json_path) as file_pointer:
610
                json_obj = json.load(file_pointer)
611
        # the file is not deserializable as a json object
612
        except ValueError as exc:
613
            self._plugin.log.error('Malformed JSON file: {0} \n {1}'.format(excel_config_json_path, exc))
614
            raise exc
615
        # some os error occured (e.g file not found or malformed path string)
616
        # have to catch two exception classes: in py2 : IOError; py3: OSError
617
        except (IOError, OSError) as exc:
618
            self._plugin.log.error(exc)
619
            # raise only OSError to make error handling in caller easier
620
            raise OSError()
621
622
        # validate json object if schema file path is there; otherwise throw warning
623
        try:
624
            with open(JSON_SCHEMA_FILE_PATH) as file_pointer:
625
                schema_obj = json.load(file_pointer)
626
        # the file is not deserializable as a json object
627
        except ValueError as exc:
628
            self._plugin.log.error('Malformed JSON schema file: {0} \n {1}'.format(JSON_SCHEMA_FILE_PATH, exc))
629
            raise exc
630
        # some os error occured (e.g file not found or malformed path string)
631
        # have to catch two exception classes:  in py2 : IOError; py3: OSError
632
        except (IOError, OSError) as exc:
633
            self._plugin.log.error(exc)
634
            # raise only OSError to make error handling in caller easier
635
            raise OSError()
636
637
        # do the validation
638
        try:
639
            validate(json_obj, schema_obj)
640
        except ValidationError as error:
641
            self._plugin.log.error("Validation failed: {0}".format(error.message))
642
            raise
643
        except SchemaError:
644
            self._plugin.log.error("Invalid schema file: {0}".format(JSON_SCHEMA_FILE_PATH))
645
            raise
646
647
        return json_obj
648
649
    def _get_sheet(self, workbook):
650
651
        # get sheet
652
        worksheet = None
653
        if type(self.excel_config['sheet_config']) == int:
0 ignored issues
show
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
654
            worksheet = workbook.worksheets[self.excel_config['sheet_config'] - 1]
655
        elif self.excel_config['sheet_config'] == 'active':
656
            worksheet = workbook.active
657
        elif self.excel_config['sheet_config'].startswith('name'):
658
            worksheet = workbook[self.excel_config['sheet_config'].split(':')[1]]
659
        elif self.excel_config['sheet_config'] == 'first':
660
            worksheet = workbook.worksheets[0]
661
        elif self.excel_config['sheet_config'] == 'last':
662
            worksheet = workbook.worksheets[len(workbook.get_sheet_names()) - 1]
663
        else:
664
            # This cannot happen if json validation was ok
665
            pass
666
        return worksheet
667
668
    def _raise_value_error(self, msg):
669
        self._plugin.log.error(msg)
670
        raise ValueError(msg.encode('UTF-8', 'ignore'))
671
672
    def _transform_coordinates(self, row=None, column=None):
673
        if row is None and column is None:
674
            raise ValueError("_transform_coordinates: row and column cannot both be None.")
675
        target_str = ''
676
        if self.excel_config['orientation'] == 'column_based':
677
            if column is not None:
678
                target_str = get_column_letter(column)
679
            if row is not None:
680
                target_str += str(row)
681
        else:
682
            if row is not None:
683
                target_str = get_column_letter(row)
684
            if column is not None:
685
                target_str += str(column)
686
        return target_str
687