Completed
Push — master ( 545613...55df2f )
by Daniel
14s queued 11s
created

DataInputOutput.fn_file_operation_logger()   A

Complexity

Conditions 2

Size

Total Lines 7
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 2
nop 3
1
"""
2
Data Input Output class
3
"""
4
# package to add support for multi-language (i18n)
5
import gettext
6
# package to handle files/folders and related metadata/operations
7
import os
8
# package facilitating Data Frames manipulation
9
import pandas
10
11
12
class DataInputOutput:
13
    locale = None
14
15
    def __init__(self, in_language='en_US'):
16
        current_script = os.path.basename(__file__).replace('.py', '')
17
        lang_folder = os.path.join(os.path.dirname(__file__), current_script + '_Locale')
18
        self.locale = gettext.translation(current_script, lang_folder, languages=[in_language])
19
20
    @staticmethod
21
    def fn_add_missing_defaults_to_dict_message(in_dict):
22
        if 'field delimiter' not in in_dict:
23
            in_dict['field delimiter'] = os.pathsep
24
        if 'compression' not in in_dict:
25
            in_dict['compression'] = 'infer'
26
        return in_dict
27
28
    def fn_build_feedback_for_logger(self, operation_details):
29
        messages = {}
30
        if operation_details['operation'] == 'load':
31
            files_counted = str(operation_details['files counted'])
32
            messages = {
33
                'failed': self.locale.gettext(
34
                    'Error encountered on loading Pandas Data Frame '
35
                    + 'from {file_type} file type (see below)')
36
                    .replace('{file_type}',  operation_details['format'].upper()),
37
                'success': self.locale.gettext(
38
                    'All {files_counted} files of type {file_type} '
39
                    + 'successfully added to a Pandas Data Frame')
40
                    .replace('{files_counted}', files_counted)
41
                    .replace('{file_type}', operation_details['format'].upper())
42
            }
43
        elif operation_details['operation'] == 'save':
44
            messages = {
45
                'failed': self.locale.gettext(
46
                    'Error encountered on saving Pandas Data Frame '
47
                    + 'into a {file_type} file type (see below)')
48
                    .replace('{file_type}',  operation_details['format'].upper()),
49
                'success': self.locale.gettext(
50
                    'Pandas Data Frame has just been saved to file "{file_name}", '
51
                    + 'considering {file_type} as file type')
52
                    .replace('{file_name}',operation_details['name'])
53
                    .replace('{file_type}',  operation_details['format'].upper()),
54
            }
55
        return messages
56
57
    def fn_file_operation_logger(self, local_logger, in_logger_dict):
58
        messages = self.fn_build_feedback_for_logger(in_logger_dict)
59
        if in_logger_dict['error details'] is None:
60
            local_logger.info(messages['success'])
61
        else:
62
            local_logger.error(messages['failed'])
63
            local_logger.error(in_logger_dict['error details'])
64
65
    def fn_load_file_into_data_frame(self, in_logger, timer, in_dict):
66
        timer.start()
67
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
68
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
69
            in_dict.update({'operation': 'load'})
70
            in_dict = self.fn_pack_dict_message(in_dict, in_dict['file list'])
71
            in_dict = self.fn_internal_load_csv_file_into_data_frame(in_dict)
72
            in_dict = self.fn_internal_load_excel_file_into_data_frame(in_dict)
73
            in_dict = self.fn_internal_load_json_file_into_data_frame(in_dict)
74
            in_dict = self.fn_internal_load_pickle_file_into_data_frame(in_dict)
75
            self.fn_file_operation_logger(in_logger, in_dict)
76
        timer.stop()
77
        return in_dict['out data frame']
78
79
    @staticmethod
80
    def fn_internal_load_csv_file_into_data_frame(in_dict):
81
        if in_dict['format'].lower() == 'csv':
82
            try:
83
                in_dict['out data frame'] = pandas.concat(
84
                    [pandas.read_csv(filepath_or_buffer=crt_file,
85
                                     delimiter=in_dict['field delimiter'],
86
                                     cache_dates=True,
87
                                     index_col=None,
88
                                     memory_map=True,
89
                                     low_memory=False,
90
                                     encoding='utf-8',
91
                                     ) for crt_file in in_dict['files list']])
92
            except Exception as err:
93
                in_dict['error details'] = err
94
        return in_dict
95
96
    @staticmethod
97
    def fn_internal_load_excel_file_into_data_frame(in_dict):
98
        if in_dict['format'].lower() == 'excel':
99
            try:
100
                in_dict['out data frame'] = pandas.concat(
101
                    [pandas.read_excel(io=crt_file,
102
                                       verbose=True,
103
                                       ) for crt_file in in_dict['files list']])
104
            except Exception as err:
105
                in_dict['error details'] = err
106
        return in_dict
107
108
    @staticmethod
109
    def fn_internal_load_json_file_into_data_frame(in_dict):
110
        if in_dict['format'].lower() == 'json':
111
            try:
112
                in_dict['out data frame'] = pandas.concat(
113
                    [pandas.read_json(filepath_or_buffer=crt_file,
114
                                      compression=in_dict['compression'],
115
                                      ) for crt_file in in_dict['files list']])
116
            except Exception as err:
117
                in_dict['error details'] = err
118
        return in_dict
119
120
    @staticmethod
121
    def fn_internal_load_pickle_file_into_data_frame(in_dict):
122
        if in_dict['format'].lower() == 'pickle':
123
            try:
124
                in_dict['out data frame'] = pandas.concat(
125
                    [pandas.read_pickle(filepath_or_buffer=crt_file,
126
                                        compression=in_dict['compression'],
127
                                        ) for crt_file in in_dict['files list']])
128
            except Exception as err:
129
                in_dict['error details'] = err
130
        return in_dict
131
132
    @staticmethod
133
    def fn_internal_store_data_frame_to_csv_file(in_dict):
134
        if in_dict['format'].lower() == 'csv':
135
            try:
136
                in_dict['in data frame'].to_csv(path_or_buf=in_dict['name'],
137
                                                sep=in_dict['field delimiter'],
138
                                                header=True,
139
                                                index=False,
140
                                                encoding='utf-8')
141
            except Exception as err:
142
                in_dict['error details'] = err
143
        return in_dict
144
145
    @staticmethod
146
    def fn_internal_store_data_frame_to_excel_file(in_dict):
147
        if in_dict['format'].lower() == 'excel':
148
            try:
149
                in_dict['in data frame'].to_excel(excel_writer=in_dict['name'],
150
                                                  engine='xlsxwriter',
151
                                                  freeze_panes=(1, 1),
152
                                                  encoding='utf-8',
153
                                                  index=False,
154
                                                  verbose=True)
155
            except Exception as err:
156
                in_dict['error details'] = err
157
        return in_dict
158
159
    @staticmethod
160
    def fn_internal_store_data_frame_to_json_file(in_dict):
161
        if in_dict['format'].lower() == 'json':
162
            try:
163
                in_dict['in data frame'].to_json(path_or_buf=in_dict['name'],
164
                                                 compression=in_dict['compression'])
165
            except Exception as err:
166
                in_dict['error details'] = err
167
        return in_dict
168
169
    @staticmethod
170
    def fn_internal_store_data_frame_to_pickle_file(in_dict):
171
        if in_dict['format'].lower() == 'pickle':
172
            try:
173
                in_dict['in data frame'].to_pickle(path=in_dict['name'],
174
                                                   compression=in_dict['compression'])
175
            except Exception as err:
176
                in_dict['error details'] = err
177
        return in_dict
178
179
    @staticmethod
180
    def fn_pack_dict_message(in_dict, in_file_list):
181
        return {
182
            'compression': in_dict['compression'],
183
            'field delimiter': in_dict['field delimiter'],
184
            'files list': in_file_list,
185
            'files counted': len(in_file_list),
186
            'error details': None,
187
            'format': in_dict['format'],
188
            'name': in_dict['name'],
189
            'in data frame': None,
190
            'operation': in_dict['operation'],
191
            'out data frame': None,
192
        }
193
194
    def fn_store_data_frame_to_file(self, in_logger, timer, in_data_frame, in_dict):
195
        timer.start()
196
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
197
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
198
            in_dict.update({'operation': 'save'})
199
            in_dict = self.fn_pack_dict_message(in_dict, [])
200
            in_dict.update({'in data frame': in_data_frame})
201
            in_dict = self.fn_internal_store_data_frame_to_csv_file(in_dict)
202
            in_dict = self.fn_internal_store_data_frame_to_excel_file(in_dict)
203
            in_dict = self.fn_internal_store_data_frame_to_json_file(in_dict)
204
            in_dict = self.fn_internal_store_data_frame_to_pickle_file(in_dict)
205
            self.fn_file_operation_logger(in_logger, in_dict)
206
        timer.stop()
207
208
    def fn_store_data_frame_to_file_validation(self, local_logger, in_file_details):
209
        given_format_is_implemented = False
210
        if 'format' in in_file_details:
211
            implemented_file_formats = ['csv', 'excel', 'json', 'pickle']
212
            given_format = in_file_details['format'].lower()
213
            given_format_is_implemented = True
214
            if given_format not in implemented_file_formats:
215
                given_format_is_implemented = False
216
                local_logger.error(self.locale.gettext(
217
                    'File "format" attribute has a value of "{format_value}" '
218
                    + 'which is not among currently implemented values: '
219
                    + '"{implemented_file_formats}", '
220
                    + 'therefore desired file operation is not possible')
221
                                   .replace('{format_value}', given_format)
222
                                   .replace('{implemented_file_formats}',
223
                                            '", "'.join(implemented_file_formats)))
224
        else:
225
            local_logger.error(self.locale.gettext(
226
                    'File "format" attribute is mandatory in the file setting, but missing, '
227
                    + 'therefore desired file operation is not possible'))
228
        return given_format_is_implemented
229