Passed
Push — development/test ( ccd4a2...9ec173 )
by Daniel
01:05
created

DataInputOutput.fn_save_data_frame_to_pickle()   A

Complexity

Conditions 2

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 2
nop 2
1
"""
2
Data Input Output class
3
"""
4
# package to add support for multi-language (i18n)
5
import gettext
6
# package to handle files/folders and related metadata/operations
7
import os
8
# package facilitating Data Frames manipulation
9
import pandas as pd
10
11
12
class DataInputOutput:
13
    lcl = None
14
15
    def __init__(self, default_language='en_US'):
16
        current_script = os.path.basename(__file__).replace('.py', '')
17
        lang_folder = os.path.join(os.path.dirname(__file__), current_script + '_Locale')
18
        self.lcl = gettext.translation(current_script, lang_folder, languages=[default_language])
19
20
    def fn_load_file_list_to_data_frame(self, local_logger, timmer, file_list, csv_delimiter):
21
        timmer.start()
22
        out_data_frame = pd.concat([pd.read_csv(filepath_or_buffer=current_file,
23
                                                delimiter=csv_delimiter,
24
                                                cache_dates=True,
25
                                                index_col=None,
26
                                                memory_map=True,
27
                                                low_memory=False,
28
                                                encoding='utf-8',
29
                                                ) for current_file in file_list])
30
        local_logger.info(self.lcl.gettext(
31
            'All relevant files ({files_counted}) were merged into a Pandas Data Frame')
32
                          .replace('{files_counted}', str(len(file_list))))
33
        timmer.stop()
34
        return out_data_frame
35
36
    @staticmethod
37
    def fn_save_data_frame_to_csv(in_data_frame, in_file_details):
38
        if 'field-delimiter' not in in_file_details:
39
            in_file_details['field-delimiter'] = os.pathsep
40
        in_data_frame.to_csv(path_or_buf=in_file_details['name'],
41
                             sep=in_file_details['field-delimiter'],
42
                             header=True,
43
                             index=False,
44
                             encoding='utf-8')
45
46
    @staticmethod
47
    def fn_save_data_frame_to_pickle(in_data_frame, in_file_details):
48
        if 'compression' not in in_file_details:
49
            in_file_details['compression'] = 'gzip'
50
        in_data_frame.to_pickle(path=in_file_details['name'],
51
                                compression=in_file_details['compression'])
52
53
    def fn_store_data_frame_to_file(self, local_logger, timmer, in_data_frame, in_file_details):
54
        timmer.start()
55
        is_file_saved = False
56
        given_format = self.fn_store_data_frame_to_file_validation(local_logger, in_file_details)
57
        if given_format == 'csv':
58
            self.fn_save_data_frame_to_csv(in_data_frame, in_file_details)
59
            is_file_saved = True
60
        elif given_format == 'excel':
61
            in_data_frame.to_excel(excel_writer=in_file_details['name'],
62
                                   engine='xlsxwriter',
63
                                   freeze_panes=(1, 1),
64
                                   verbose=True)
65
            is_file_saved = True
66
        elif given_format == 'pickle':
67
            self.fn_save_data_frame_to_pickle(in_data_frame, in_file_details)
68
            is_file_saved = True
69
        if is_file_saved:
70
            local_logger.info(self.lcl.gettext(
71
                'Pandas Data Frame has just been saved to file "{file_name}", '
72
                + 'considering {file_type} as file type')
73
                              .replace('{file_name}', in_file_details['name'])
74
                              .replace('{file_type}', in_file_details['format']))
75
        timmer.stop()
76
77
    def fn_store_data_frame_to_file_validation(self, local_logger, in_file_details):
78
        are_settings_ok = False
79
        if 'format' in in_file_details:
80
            implemented_file_formats = ['csv', 'excel', 'pickle']
81
            given_format = in_file_details['format'].lower()
82
            if given_format in implemented_file_formats:
83
                are_settings_ok = given_format
84
            else:
85
                local_logger.error(self.lcl.gettext(
86
                        'File "format" attribute has a value of "{format_value}" '
87
                        + 'which is not among currently implemented values: '
88
                        + '"{implemented_file_formats}", therefore file saving is not possible')
89
                                   .replace('{format_value}', given_format)
90
                                   .replace('{implemented_file_formats}',
91
                                            '", "'.join(implemented_file_formats)))
92
        else:
93
            local_logger.error(self.lcl.gettext('File "format" attribute is mandatory '
94
                                                + 'in the file setting, but missing, '
95
                                                + 'therefore file saving is not possible'))
96
        return are_settings_ok
97