Passed
Push — master ( 77441b...e86b5a )
by Daniel
01:30
created

DataInputOutput.__init__()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 9
nop 2
dl 0
loc 9
rs 9.95
c 0
b 0
f 0
1
"""
2
Data Input Output class
3
"""
4
# package to add support for multi-language (i18n)
5
import gettext
6
# package to handle files/folders and related metadata/operations
7
import os
8
# package facilitating Data Frames manipulation
9
import pandas
10
11
12
class DataInputOutput:
13
    locale = None
14
15
    def __init__(self, in_language = 'en_US'):
16
        file_parts = os.path.normpath(os.path.abspath(__file__)).replace('\\', os.path.altsep) \
17
            .split(os.path.altsep)
18
        locale_domain = file_parts[(len(file_parts) - 1)].replace('.py', '')
19
        locale_folder = os.path.normpath(os.path.join(
20
                os.path.join(os.path.altsep.join(file_parts[:-2]), 'project_locale'),
21
                locale_domain))
22
        self.locale = gettext.translation(locale_domain, localedir = locale_folder,
23
                                          languages = [in_language], fallback = True)
24
25
    @staticmethod
26
    def fn_add_missing_defaults_to_dict_message(in_dict):
27
        if 'field delimiter' not in in_dict:
28
            in_dict['field delimiter'] = os.pathsep
29
        if 'compression' not in in_dict:
30
            in_dict['compression'] = 'infer'
31
        return in_dict
32
33
    def fn_build_feedback_for_logger(self, operation_details):
34
        messages = {}
35
        if operation_details['operation'] == 'load':
36
            files_counted = str(operation_details['files counted'])
37
            messages = {
38
                'failed' : self.locale.gettext(
39
                        'Error encountered on loading Pandas Data Frame '
40
                        + 'from {file_type} file type (see below)')
41
                    .replace('{file_type}', operation_details['format'].upper()),
42
                'success': self.locale.gettext(
43
                        'All {files_counted} files of type {file_type} '
44
                        + 'successfully added to a Pandas Data Frame')
45
                    .replace('{files_counted}', files_counted)
46
                    .replace('{file_type}', operation_details['format'].upper())
47
            }
48
        elif operation_details['operation'] == 'save':
49
            messages = {
50
                'failed' : self.locale.gettext(
51
                        'Error encountered on saving Pandas Data Frame '
52
                        + 'into a {file_type} file type (see below)')
53
                    .replace('{file_type}', operation_details['format'].upper()),
54
                'success': self.locale.gettext(
55
                        'Pandas Data Frame has just been saved to file "{file_name}", '
56
                        + 'considering {file_type} as file type')
57
                    .replace('{file_name}', operation_details['name'])
58
                    .replace('{file_type}', operation_details['format'].upper()),
59
            }
60
        return messages
61
62
    def fn_file_operation_logger(self, local_logger, in_logger_dict):
63
        messages = self.fn_build_feedback_for_logger(in_logger_dict)
64
        if in_logger_dict['error details'] is None:
65
            local_logger.info(messages['success'])
66
        else:
67
            local_logger.error(messages['failed'])
68
            local_logger.error(in_logger_dict['error details'])
69
70
    def fn_load_file_into_data_frame(self, in_logger, timer, in_dict):
71
        timer.start()
72
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
73
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
74
            in_dict.update({'operation': 'load'})
75
            in_dict = self.fn_pack_dict_message(in_dict, in_dict['file list'])
76
            in_dict = self.fn_internal_load_csv_file_into_data_frame(in_dict)
77
            in_dict = self.fn_internal_load_excel_file_into_data_frame(in_dict)
78
            in_dict = self.fn_internal_load_json_file_into_data_frame(in_dict)
79
            in_dict = self.fn_internal_load_parquet_file_into_data_frame(in_dict)
80
            in_dict = self.fn_internal_load_pickle_file_into_data_frame(in_dict)
81
            self.fn_file_operation_logger(in_logger, in_dict)
82
        timer.stop()
83
        return in_dict['out data frame']
84
85
    @staticmethod
86
    def fn_internal_load_csv_file_into_data_frame(in_dict):
87
        if in_dict['format'].lower() == 'csv':
88
            try:
89
                in_dict['out data frame'] = pandas.concat(
90
                        [pandas.read_csv(filepath_or_buffer = crt_file,
91
                                         delimiter = in_dict['field delimiter'],
92
                                         cache_dates = True,
93
                                         index_col = None,
94
                                         memory_map = True,
95
                                         low_memory = False,
96
                                         encoding = 'utf-8',
97
                                         ) for crt_file in in_dict['files list']],
98
                        sort = False)
99
            except Exception as err:
100
                in_dict['error details'] = err
101
        return in_dict
102
103
    @staticmethod
104
    def fn_internal_load_excel_file_into_data_frame(in_dict):
105
        if in_dict['format'].lower() == 'excel':
106
            try:
107
                in_dict['out data frame'] = pandas.concat(
108
                        [pandas.read_excel(io = crt_file,
109
                                           verbose = True,
110
                                           ) for crt_file in in_dict['files list']],
111
                        sort = False)
112
            except Exception as err:
113
                in_dict['error details'] = err
114
        return in_dict
115
116
    @staticmethod
117
    def fn_internal_load_json_file_into_data_frame(in_dict):
118
        if in_dict['format'].lower() == 'json':
119
            try:
120
                in_dict['out data frame'] = pandas.concat(
121
                        [pandas.read_json(path_or_buf = crt_file,
122
                                          compression = in_dict['compression'],
123
                                          ) for crt_file in in_dict['files list']],
124
                        sort = False)
125
            except Exception as err:
126
                in_dict['error details'] = err
127
        return in_dict
128
129
    @staticmethod
130
    def fn_internal_load_parquet_file_into_data_frame(in_dict):
131
        if in_dict['format'].lower() == 'parquet':
132
            try:
133
                in_dict['out data frame'] = pandas.concat(
134
                        [pandas.read_parquet(path = crt_file,
135
                                             ) for crt_file in in_dict['files list']],
136
                        sort = False)
137
            except Exception as err:
138
                in_dict['error details'] = err
139
        return in_dict
140
141
    @staticmethod
142
    def fn_internal_load_pickle_file_into_data_frame(in_dict):
143
        if in_dict['format'].lower() == 'pickle':
144
            try:
145
                in_dict['out data frame'] = pandas.concat(
146
                        [pandas.read_pickle(path = crt_file,
147
                                            compression = in_dict['compression'],
148
                                            ) for crt_file in in_dict['files list']],
149
                        sort = False)
150
            except Exception as err:
151
                in_dict['error details'] = err
152
        return in_dict
153
154
    @staticmethod
155
    def fn_internal_store_data_frame_to_csv_file(in_dict):
156
        if in_dict['format'].lower() == 'csv':
157
            try:
158
                in_dict['in data frame'].to_csv(path_or_buf = in_dict['name'],
159
                                                sep = in_dict['field delimiter'],
160
                                                header = True,
161
                                                index = False,
162
                                                encoding = 'utf-8')
163
            except Exception as err:
164
                in_dict['error details'] = err
165
        return in_dict
166
167
    @staticmethod
168
    def fn_internal_store_data_frame_to_excel_file(in_dict):
169
        if in_dict['format'].lower() == 'excel':
170
            try:
171
                in_dict['in data frame'].to_excel(excel_writer = in_dict['name'],
172
                                                  engine = 'xlsxwriter',
173
                                                  freeze_panes = (1, 1),
174
                                                  encoding = 'utf-8',
175
                                                  index = False,
176
                                                  verbose = True)
177
            except Exception as err:
178
                in_dict['error details'] = err
179
        return in_dict
180
181
    @staticmethod
182
    def fn_internal_store_data_frame_to_json_file(in_dict):
183
        if in_dict['format'].lower() == 'json':
184
            try:
185
                in_dict['in data frame'].to_json(path_or_buf = in_dict['name'],
186
                                                 compression = in_dict['compression'])
187
            except Exception as err:
188
                in_dict['error details'] = err
189
        return in_dict
190
191
    @staticmethod
192
    def fn_internal_store_data_frame_to_parquet_file(in_dict):
193
        if in_dict['format'].lower() == 'parquet':
194
            try:
195
                in_dict['in data frame'].to_parquet(path = in_dict['name'],
196
                                                    compression = in_dict['compression'],
197
                                                    use_deprecated_int96_timestamps = True)
198
            except Exception as err:
199
                in_dict['error details'] = err
200
        return in_dict
201
202
    @staticmethod
203
    def fn_internal_store_data_frame_to_pickle_file(in_dict):
204
        if in_dict['format'].lower() == 'pickle':
205
            try:
206
                in_dict['in data frame'].to_pickle(path = in_dict['name'],
207
                                                   compression = in_dict['compression'])
208
            except Exception as err:
209
                in_dict['error details'] = err
210
        return in_dict
211
212
    @staticmethod
213
    def fn_pack_dict_message(in_dict, in_file_list):
214
        if in_dict['format'].lower() in ('parquet', 'pickle') \
215
                and in_dict['compression'].lower() == 'none':
216
            in_dict['compression'] = None
217
        return {
218
            'compression'    : in_dict['compression'],
219
            'field delimiter': in_dict['field delimiter'],
220
            'files list'     : in_file_list,
221
            'files counted'  : len(in_file_list),
222
            'error details'  : None,
223
            'format'         : in_dict['format'],
224
            'name'           : in_dict['name'],
225
            'in data frame'  : None,
226
            'operation'      : in_dict['operation'],
227
            'out data frame' : None,
228
        }
229
230
    def fn_store_data_frame_to_file(self, in_logger, timer, in_data_frame, in_dict):
231
        timer.start()
232
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
233
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
234
            in_dict.update({'operation': 'save'})
235
            in_dict = self.fn_pack_dict_message(in_dict, [])
236
            in_dict.update({'in data frame': in_data_frame})
237
            # special case treatment
238
            in_dict = self.fn_internal_store_data_frame_to_csv_file(in_dict)
239
            in_dict = self.fn_internal_store_data_frame_to_excel_file(in_dict)
240
            in_dict = self.fn_internal_store_data_frame_to_json_file(in_dict)
241
            in_dict = self.fn_internal_store_data_frame_to_parquet_file(in_dict)
242
            in_dict = self.fn_internal_store_data_frame_to_pickle_file(in_dict)
243
            self.fn_file_operation_logger(in_logger, in_dict)
244
        timer.stop()
245
246
    def fn_store_data_frame_to_file_validation(self, local_logger, in_file_details):
247
        given_format_is_implemented = False
248
        if 'format' in in_file_details:
249
            implemented_file_formats = ['csv', 'excel', 'json', 'parquet', 'pickle']
250
            given_format = in_file_details['format'].lower()
251
            given_format_is_implemented = True
252
            if given_format not in implemented_file_formats:
253
                given_format_is_implemented = False
254
                local_logger.error(self.locale.gettext(
255
                        'File "format" attribute has a value of "{format_value}" '
256
                        + 'which is not among currently implemented values: '
257
                        + '"{implemented_file_formats}", '
258
                        + 'therefore desired file operation is not possible')
259
                                   .replace('{format_value}', given_format)
260
                                   .replace('{implemented_file_formats}',
261
                                            '", "'.join(implemented_file_formats)))
262
        else:
263
            local_logger.error(self.locale.gettext(
264
                    'File "format" attribute is mandatory in the file setting, but missing, '
265
                    + 'therefore desired file operation is not possible'))
266
        return given_format_is_implemented
267