Test Failed
Push — master ( 3e515d...19e016 )
by Daniel
01:46
created

TableauHyperApiExtraLogic.fn_convert_and_validate_content()   C

Complexity

Conditions 11

Size

Total Lines 31
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 29
nop 3
dl 0
loc 31
rs 5.4
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like TableauHyperApiExtraLogic.TableauHyperApiExtraLogic.fn_convert_and_validate_content() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import csv
2
3
from BasicNeeds import BasicNeeds as cls_bn
4
from TypeDetermination import TypeDetermination
5
from datetime import datetime,time
6
from tableauhyperapi import HyperProcess, Telemetry, \
7
    Connection, CreateMode, \
8
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
9
    Inserter, \
10
    escape_name, escape_string_literal, \
11
    TableName, \
12
    HyperException, \
13
    Timestamp
14
15
16
class TableauHyperApiExtraLogic:
17
18
    def fn_build_hyper_columns_for_csv(given_file_name, csv_field_separator, detected_csv_structure, verbose):
19
        list_hyper_table_columns_to_return = []
20
        for current_field_structure in detected_csv_structure:
21
            list_hyper_table_columns_to_return.append(current_field_structure['order'])
22
            current_column_type = TableauHyperApiExtraLogic.fn_convert_to_hyper_types(current_field_structure['type'])
23
            cls_bn.fn_optional_print(cls_bn, verbose, 'Column '
24
                                     + str(current_field_structure['order']) + ' having name "'
25
                                     + current_field_structure['name'] + '" and type "'
26
                                     + current_field_structure['type'] + '" will become "'
27
                                     + str(current_column_type) + '"')
28
            list_hyper_table_columns_to_return[current_field_structure['order']] = TableDefinition.Column(
29
                name=current_field_structure['name'],
30
                type=current_column_type,
31
                nullability=NULLABLE
32
            )
33
        return list_hyper_table_columns_to_return
34
35
    def fn_convert_and_validate_content(self, crt_value, crt_type):
36
        if crt_value == '':
37
            return None
38
        else:
39
            if crt_type == 'int':
40
                return int(crt_value)
41
            elif crt_type == 'float-USA':
42
                return float(crt_value)
43
            elif crt_type == 'date-iso8601':
44
                tm = datetime.strptime(crt_value, '%Y-%m-%d')
45
                return datetime(tm.year, tm.month, tm.day)
46
            elif crt_type == 'date-USA':
47
                tm = datetime.strptime(crt_value, '%m/%d/%Y')
48
                return datetime(tm.year, tm.month, tm.day)
49
            elif crt_type == 'time-24':
50
                tm = datetime.strptime(crt_value, '%H:%M:%S')
51
                return time(tm.hour, tm.minute, tm.second)
52
            elif crt_type == 'time-24-us':
53
                tm = datetime.strptime(crt_value, '%H:%M:%S.%f')
54
                return time(tm.hour, tm.minute, tm.second, tm.microsecond)
55
            elif crt_type == 'time-USA':
56
                tm = datetime.strptime(crt_value, '%I:%M:%S')
57
                return time(tm.hour, tm.minute, tm.second)
58
            elif crt_type == 'datetime-iso8601':
59
                tm = datetime.fromisoformat(crt_value)
60
                return Timestamp(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second)
61
            elif crt_type == 'datetime-iso8601-us':
62
                tm = datetime.fromisoformat(crt_value)
63
                return Timestamp(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm.microsecond)
64
            else:
65
                return crt_value.replace('"', '\\"')
66
67
    def fn_convert_to_hyper_types(given_type):
68
        switcher = {
69
            'empty': SqlType.text(),
70
            'int': SqlType.big_int(),
71
            'float-USA': SqlType.double(),
72
            'date-iso8601': SqlType.date(),
73
            'date-USA': SqlType.date(),
74
            'time-24': SqlType.time(),
75
            'time-24-us': SqlType.time(),
76
            'time-USA': SqlType.time(),
77
            'datetime-iso8601': SqlType.timestamp(),
78
            'str': SqlType.text()
79
        }
80
        identified_type = switcher.get(given_type)
81
        if identified_type is None:
82
            identified_type = SqlType.text()
83
        return identified_type
84
85
    def fn_rebuild_csv_content_for_hyper(given_file_name, csv_field_separator, detected_fields_type, verbose):
86
        csv_content_for_hyper = []
87
        with open(given_file_name, newline='') as csv_file:
88
            csv_object = csv.DictReader(csv_file, delimiter=csv_field_separator)
89
            # parse rows with index
90
            for row_idx, row_content in enumerate(csv_object):
91
                csv_content_for_hyper.append(row_idx)
92
                csv_content_for_hyper[row_idx] = []
93
                print_prefix = 'On the row ' + str((row_idx + 1))
94
                # parse all columns with index
95
                for col_idx, column_name in enumerate(csv_object.fieldnames):
96
                    csv_content_for_hyper[row_idx].append(col_idx)
97
                    csv_content_for_hyper[row_idx][col_idx] = \
98
                        self.fn_convert_and_validate_content(self,
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable self does not seem to be defined.
Loading history...
99
                                                             row_content[csv_object.fieldnames[col_idx]],
100
                                                             detected_fields_type[col_idx]['type'])
101
                    cls_bn.fn_optional_print(cls_bn, verbose, print_prefix + ' column ' + str(col_idx)
102
                                             + ' having the name [' + csv_object.fieldnames[col_idx] + '] '
103
                                             + ' has the value <' + row_content[csv_object.fieldnames[col_idx]]
104
                                             + '> which was interpreted as <<'
105
                                             + str(csv_content_for_hyper[row_idx][col_idx])
106
                                             + '>>')
107
        return csv_content_for_hyper
108
109
    def fn_run_create_hyper_file_from_csv(input_csv_file,
110
                                          csv_field_separator,
111
                                          output_hyper_file,
112
                                          verbose):
113
        detected_csv_structure = TypeDetermination.fn_detect_csv_structure(TypeDetermination,
114
                                                                           input_csv_file,
115
                                                                           csv_field_separator,
116
                                                                           verbose)
117
        hyper_table_columns = TableauHyperApiExtraLogic.fn_build_hyper_columns_for_csv(input_csv_file,
118
                                                                                       csv_field_separator,
119
                                                                                       detected_csv_structure,
120
                                                                                       verbose)
121
        # Starts the Hyper Process with telemetry enabled/disabled to send data to Tableau or not
122
        # To opt in, simply set telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU.
123
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
124
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
125
            # Creates new Hyper file <output_hyper_file>
126
            # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
127
            with Connection(endpoint=hyper.endpoint,
128
                            database=output_hyper_file,
129
                            create_mode=CreateMode.CREATE_AND_REPLACE) as hyper_connection:
130
                hyper_connection.catalog.create_schema("Extract")
131
                hyper_table = TableDefinition(
132
                    name=TableName("Extract", "Extract"),
133
                    columns=hyper_table_columns
134
                )
135
                hyper_connection.catalog.create_table(table_definition=hyper_table)
136
                print("The connection to the Hyper engine file has been created.")
137
                '''
138
                VERDICT: does not work as DOUBLE or INT are not accepting empty values... :-(
139
                print("I am about to execute command: " 
140
                    + f"COPY {hyper_table.table_name} from {escape_string_literal(input_csv_file)} with "
141
                    f"(format csv, NULL 'NULL', delimiter '{csv_field_separator}', header)")
142
                # Load all rows into "Customers" table from the CSV file.
143
                # `execute_command` executes a SQL statement and returns the impacted row count.
144
                count_in_target_table = hyper_connection.execute_command(
145
                    command=f"COPY {hyper_table.table_name} from {escape_string_literal(input_csv_file)} with "
146
                    f"(format csv, NULL 'NULL', delimiter '{csv_field_separator}', header)")
147
                print(f"The number of rows in table {hyper_table.table_name} is {count_in_target_table}.")
148
                '''
149
                # The rows to insert into the <hyper_table> table.
150
                data_to_insert = TableauHyperApiExtraLogic.fn_rebuild_csv_content_for_hyper(input_csv_file,
151
                                                                                            csv_field_separator,
152
                                                                                            detected_csv_structure,
153
                                                                                            verbose)
154
                # Execute the actual insert
155
                with Inserter(hyper_connection, hyper_table) as hyper_inserter:
156
                    hyper_inserter.add_rows(rows=data_to_insert)
157
                    hyper_inserter.execute()
158
                # Number of rows in the <hyper_table> table.
159
                # `execute_scalar_query` is for executing a query that returns exactly one row with one column.
160
                row_count = hyper_connection.\
161
                    execute_scalar_query(query=f"SELECT COUNT(*) FROM {hyper_table.table_name}")
162
                print(f"The number of rows in table {hyper_table.table_name} is {row_count}.")
163
            print("The connection to the Hyper file has been closed.")
164
        print("The Hyper process has been shut down.")