|
1
|
|
|
""" |
|
2
|
|
|
main - entry point of the package |
|
3
|
|
|
|
|
4
|
|
|
This file is performing CSV read into HYPER file and measures time elapsed (performance) |
|
5
|
|
|
""" |
|
6
|
|
|
# package to facilitate operating system locale detection |
|
7
|
|
|
import locale |
|
8
|
|
|
# package to handle files/folders and related metadata/operations |
|
9
|
|
|
import os |
|
10
|
|
|
# Custom classes specific to this package |
|
11
|
|
|
from tableau_hyper_management.ProjectNeeds import ProjectNeeds |
|
12
|
|
|
from tableau_hyper_management.TableauHyperApiExtraLogic import TableauHyperApiExtraLogic |
|
13
|
|
|
from tableau_hyper_management.TypeDetermination import TypeDetermination |
|
14
|
|
|
# get current script name |
|
15
|
|
|
SCRIPT_NAME = os.path.basename(__file__).replace('.py', '') |
|
16
|
|
|
SCRIPT_LANGUAGE = locale.getdefaultlocale('LC_ALL')[0] |
|
17
|
|
|
|
|
18
|
|
|
# main execution logic |
|
19
|
|
|
if __name__ == '__main__': |
|
20
|
|
|
# instantiate Extractor Specific Needs class |
|
21
|
|
|
class_pn = ProjectNeeds(SCRIPT_NAME, SCRIPT_LANGUAGE) |
|
22
|
|
|
# load application configuration (inputs are defined into a json file) |
|
23
|
|
|
class_pn.load_configuration() |
|
24
|
|
|
# adding a special case data type |
|
25
|
|
|
class_pn.config['data_types']['empty'] = '^$' |
|
26
|
|
|
class_pn.config['data_types']['str'] = '' |
|
27
|
|
|
# initiate Logging sequence |
|
28
|
|
|
class_pn.initiate_logger_and_timer() |
|
29
|
|
|
# reflect title and input parameters given values in the log |
|
30
|
|
|
class_pn.class_clam.listing_parameter_values( |
|
31
|
|
|
class_pn.class_ln.logger, class_pn.timer, 'Tableau Hyper Converter', |
|
32
|
|
|
class_pn.config['input_options'][SCRIPT_NAME], class_pn.parameters) |
|
33
|
|
|
relevant_files_list = class_pn.class_fo.fn_build_file_list( |
|
34
|
|
|
class_pn.class_ln.logger, class_pn.timer, class_pn.parameters.input_file) |
|
35
|
|
|
# log file statistic details |
|
36
|
|
|
class_pn.class_fo.fn_store_file_statistics( |
|
37
|
|
|
class_pn.class_ln.logger, class_pn.timer, relevant_files_list, 'Input') |
|
38
|
|
|
# loading from a specific folder all files matching a given pattern into a data frame |
|
39
|
|
|
input_dict = { |
|
40
|
|
|
'compression': class_pn.parameters.input_file_compression, |
|
41
|
|
|
'field delimiter': class_pn.parameters.csv_field_separator, |
|
42
|
|
|
'file list': relevant_files_list, |
|
43
|
|
|
'format': class_pn.parameters.input_file_format, |
|
44
|
|
|
'name': 'irrelevant', |
|
45
|
|
|
} |
|
46
|
|
|
working_data_frame = class_pn.class_dio.fn_load_file_into_data_frame( |
|
47
|
|
|
class_pn.class_ln.logger, class_pn.timer, input_dict) |
|
48
|
|
|
if working_data_frame is not None: |
|
49
|
|
|
if class_pn.parameters.output_file_format.lower() in ('csv', 'excel', 'pickle'): |
|
50
|
|
|
output_dict = input_dict |
|
51
|
|
|
output_dict['file list'] = 'irrelevant' |
|
52
|
|
|
output_dict['format'] = class_pn.parameters.output_file_format |
|
53
|
|
|
output_dict['name'] = class_pn.parameters.output_file |
|
54
|
|
|
class_pn.class_dio.fn_store_data_frame_to_file( |
|
55
|
|
|
class_pn.class_ln.logger, class_pn.timer, working_data_frame, output_dict) |
|
56
|
|
|
# store statistics about output file |
|
57
|
|
|
class_pn.class_fo.fn_store_file_statistics( |
|
58
|
|
|
class_pn.class_ln.logger, class_pn.timer, |
|
59
|
|
|
class_pn.parameters.output_file, 'Generated') |
|
60
|
|
|
elif class_pn.parameters.output_file_format.lower() == 'hyper': |
|
61
|
|
|
if class_pn.parameters.input_file_format.lower() == 'csv': |
|
62
|
|
|
class_pn.timer.start() |
|
63
|
|
|
c_td = TypeDetermination() |
|
64
|
|
|
# advanced detection of data type within Data Frame |
|
65
|
|
|
detected_csv_structure = c_td.fn_detect_csv_structure( |
|
66
|
|
|
class_pn.class_ln.logger, working_data_frame, class_pn.parameters, |
|
67
|
|
|
class_pn.config['data_types']) |
|
68
|
|
|
class_pn.timer.stop() |
|
69
|
|
|
# instantiate Tableau Hyper Api Extra Logic class |
|
70
|
|
|
class_thael = TableauHyperApiExtraLogic() |
|
71
|
|
|
# create HYPER from Data Frame |
|
72
|
|
|
class_thael.fn_run_hyper_creation( |
|
73
|
|
|
class_pn.class_ln.logger, class_pn.timer, working_data_frame, |
|
74
|
|
|
detected_csv_structure, class_pn.parameters) |
|
75
|
|
|
# store statistics about output file |
|
76
|
|
|
class_pn.class_fo.fn_store_file_statistics( |
|
77
|
|
|
class_pn.class_ln.logger, class_pn.timer, |
|
78
|
|
|
class_pn.parameters.output_file, 'Generated') |
|
79
|
|
|
else: |
|
80
|
|
|
print('For time being only CSV file types are supported as input file type ' |
|
81
|
|
|
+ 'in combination with Hyper as output file type.' |
|
82
|
|
|
+ 'An enhanced version will be deployed in near future!') |
|
83
|
|
|
# just final message |
|
84
|
|
|
class_pn.class_bn.fn_final_message( |
|
85
|
|
|
class_pn.class_ln.logger, class_pn.parameters.output_log_file, |
|
86
|
|
|
class_pn.timer.timers.total(SCRIPT_NAME)) |
|
87
|
|
|
|