Passed
Push — development/test ( 42d9fd...521135 )
by Daniel
01:10
created

DataInputOutput.__init__()   A

Complexity

Conditions 1

Size

Total Lines 4
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
nop 2
1
"""
2
Data Input Output class
3
"""
4
# package to add support for multi-language (i18n)
5
import gettext
6
# package to handle files/folders and related metadata/operations
7
import os
8
# package facilitating Data Frames manipulation
9
import pandas
10
11
12
class DataInputOutput:
13
    locale = None
14
15
    def __init__(self, default_language='en_US'):
16
        current_script = os.path.basename(__file__).replace('.py', '')
17
        lang_folder = os.path.join(os.path.dirname(__file__), current_script + '_Locale')
18
        self.locale = gettext.translation(current_script, lang_folder, languages=[default_language])
19
20
    @staticmethod
21
    def fn_add_missing_defaults_to_dict_message(in_dict):
22
        if 'field delimiter' not in in_dict:
23
            in_dict['field delimiter'] = os.pathsep
24
        if 'compression' not in in_dict:
25
            in_dict['compression'] = 'infer'
26
        return in_dict
27
28
    def fn_build_feedback_for_logger(self, operation_details):
29
        messages = {}
30
        if operation_details['operation'] == 'load':
31
            files_counted = str(operation_details['files counted'])
32
            messages = {
33
                'failed': self.locale.gettext(
34
                    'Error encountered on loading Pandas Data Frame '
35
                    + 'from {file_type} file type (see below)')
36
                    .replace('{file_type}',  operation_details['format'].upper()),
37
                'success': self.locale.gettext(
38
                    'All {files_counted} files of type {file_type} '
39
                    + 'successfully added to a Pandas Data Frame')
40
                    .replace('{files_counted}', files_counted)
41
                    .replace('{file_type}', operation_details['format'].upper())
42
            }
43
        elif operation_details['operation'] == 'save':
44
            messages = {
45
                'failed': self.locale.gettext(
46
                    'Error encountered on saving Pandas Data Frame '
47
                    + 'into a {file_type} file type (see below)')
48
                    .replace('{file_type}',  operation_details['format'].upper()),
49
                'success': self.locale.gettext(
50
                    'Pandas Data Frame has just been saved to file "{file_name}", '
51
                    + 'considering {file_type} as file type')
52
                    .replace('{file_name}',operation_details['name'])
53
                    .replace('{file_type}',  operation_details['format'].upper()),
54
            }
55
        return messages
56
57
    def fn_file_operation_logger(self, local_logger, in_logger_dict):
58
        messages = self.fn_build_feedback_for_logger(in_logger_dict)
59
        if in_logger_dict['error details'] is None:
60
            local_logger.info(messages['success'])
61
        else:
62
            local_logger.error(messages['failed'])
63
            local_logger.error(in_logger_dict['error details'])
64
65
    def fn_load_file_into_data_frame(self, in_logger, timer, in_dict):
66
        timer.start()
67
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
68
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
69
            in_dict.update({'operation': 'load'})
70
            in_dict = self.fn_pack_dict_message(in_dict, in_dict['file list'])
71
            in_dict = self.fn_internal_load_csv_file_into_data_frame(in_dict)
72
            in_dict = self.fn_internal_load_excel_file_into_data_frame(in_dict)
73
            in_dict = self.fn_internal_load_pickle_file_into_data_frame(in_dict)
74
            self.fn_file_operation_logger(in_logger, in_dict)
75
        timer.stop()
76
        return in_dict['out data frame']
77
78
    @staticmethod
79
    def fn_internal_load_csv_file_into_data_frame(in_dict):
80
        if in_dict['format'].lower() == 'csv':
81
            try:
82
                in_dict['out data frame'] = pandas.concat(
83
                        [pandas.read_csv(filepath_or_buffer=crt_file,
84
                                         delimiter=in_dict['field delimiter'],
85
                                         cache_dates=True,
86
                                         index_col=None,
87
                                         memory_map=True,
88
                                         low_memory=False,
89
                                         encoding='utf-8',
90
                                         ) for crt_file in in_dict['files list']])
91
            except Exception as err:
92
                in_dict['error details'] = err
93
        return in_dict
94
95
    @staticmethod
96
    def fn_internal_load_excel_file_into_data_frame(in_dict):
97
        if in_dict['format'].lower() == 'excel':
98
            try:
99
                in_dict['out data frame'] = pandas.concat(
100
                        [pandas.read_excel(io=crt_file,
101
                                           verbose=True,
102
                                           ) for crt_file in in_dict['files list']])
103
            except Exception as err:
104
                in_dict['error details'] = err
105
        return in_dict
106
107
    @staticmethod
108
    def fn_internal_load_pickle_file_into_data_frame(in_dict):
109
        if in_dict['format'].lower() == 'pickle':
110
            try:
111
                in_dict['out data frame'] = pandas.concat(
112
                        [pandas.read_pickle(filepath_or_buffer=crt_file,
113
                                            compression=in_dict['compression'],
114
                                            ) for crt_file in in_dict['files list']])
115
            except Exception as err:
116
                in_dict['error details'] = err
117
        return in_dict
118
119
    @staticmethod
120
    def fn_internal_store_data_frame_to_csv_file(in_dict):
121
        if in_dict['format'].lower() == 'csv':
122
            try:
123
                in_dict['in data frame'].to_csv(path_or_buf=in_dict['name'],
124
                                                sep=in_dict['field delimiter'],
125
                                                header=True,
126
                                                index=False,
127
                                                encoding='utf-8')
128
            except Exception as err:
129
                in_dict['error details'] = err
130
        return in_dict
131
132
    @staticmethod
133
    def fn_internal_store_data_frame_to_excel_file(in_dict):
134
        if in_dict['format'].lower() == 'excel':
135
            try:
136
                in_dict['in data frame'].to_excel(excel_writer=in_dict['name'],
137
                                                  engine='xlsxwriter',
138
                                                  freeze_panes=(1, 1),
139
                                                  verbose=True)
140
            except Exception as err:
141
                in_dict['error details'] = err
142
        return in_dict
143
144
    @staticmethod
145
    def fn_internal_store_data_frame_to_pickle_file(in_dict):
146
        if in_dict['format'].lower() == 'pickle':
147
            try:
148
                in_dict['in data frame'].to_pickle(path=in_dict['name'],
149
                                                   compression=in_dict['compression'])
150
            except Exception as err:
151
                in_dict['error details'] = err
152
        return in_dict
153
154
    @staticmethod
155
    def fn_pack_dict_message(in_dict, in_file_list):
156
        return {
157
            'compression': in_dict['compression'],
158
            'field delimiter': in_dict['field delimiter'],
159
            'files list': in_file_list,
160
            'files counted': len(in_file_list),
161
            'error details': None,
162
            'format': in_dict['format'],
163
            'name': in_dict['name'],
164
            'in data frame': None,
165
            'operation': in_dict['operation'],
166
            'out data frame': None,
167
        }
168
169
    def fn_store_data_frame_to_file(self, in_logger, timer, in_data_frame, in_dict):
170
        timer.start()
171
        if self.fn_store_data_frame_to_file_validation(in_logger, in_dict):
172
            in_dict = self.fn_add_missing_defaults_to_dict_message(in_dict)
173
            in_dict.update({'operation': 'save'})
174
            in_dict = self.fn_pack_dict_message(in_dict, [])
175
            in_dict.update({'in data frame': in_data_frame})
176
            in_dict = self.fn_internal_store_data_frame_to_csv_file(in_dict)
177
            in_dict = self.fn_internal_store_data_frame_to_excel_file(in_dict)
178
            in_dict = self.fn_internal_store_data_frame_to_pickle_file(in_dict)
179
            self.fn_file_operation_logger(in_logger, in_dict)
180
        timer.stop()
181
182
    def fn_store_data_frame_to_file_validation(self, local_logger, in_file_details):
183
        given_format_is_implemented = False
184
        if 'format' in in_file_details:
185
            implemented_file_formats = ['csv', 'excel', 'pickle']
186
            given_format = in_file_details['format'].lower()
187
            given_format_is_implemented = True
188
            if given_format not in implemented_file_formats:
189
                given_format_is_implemented = False
190
                local_logger.error(self.locale.gettext(
191
                    'File "format" attribute has a value of "{format_value}" '
192
                    + 'which is not among currently implemented values: '
193
                    + '"{implemented_file_formats}", '
194
                    + 'therefore desired file operation is not possible')
195
                                   .replace('{format_value}', given_format)
196
                                   .replace('{implemented_file_formats}',
197
                                            '", "'.join(implemented_file_formats)))
198
        else:
199
            local_logger.error(self.locale.gettext(
200
                    'File "format" attribute is mandatory in the file setting, but missing, '
201
                    + 'therefore desired file operation is not possible'))
202
        return given_format_is_implemented
203