companies   A
last analyzed

Complexity

Total Complexity 16

Size/Duplication

Total Lines 209
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 117
dl 0
loc 209
rs 10
c 0
b 0
f 0
wmc 16

10 Methods

Rating   Name   Duplication   Size   Complexity  
A CompaniesUpdater.correct_column_names() 0 3 1
A CompaniesUpdater.drop_unnecessary_columns() 0 7 1
A CompaniesUpdater.rename_columns_manually() 0 10 1
A CompaniesUpdater.replace_nonword_with_underscore() 0 9 2
A CompaniesUpdater.correct_boolean_values() 0 11 3
A CompaniesUpdater.__init__() 0 8 1
A CompaniesUpdater.remove_invalid_companies() 0 7 1
A CompaniesUpdater.delete_data() 0 4 1
A CompaniesUpdater.remove_invalid_and_irrelevant_companies() 0 7 1
A CompaniesUpdater.add_data() 0 66 4
1
from datetime import datetime
2
import re
3
from typing import ClassVar, final
4
5
from sqlalchemy import (
6
    Boolean,
7
    Column,
8
    Date,
9
    ForeignKey,
10
    MetaData,
11
    SmallInteger,
12
    String,
13
    Table,
14
    text,
15
)
16
17
from src.kalauz.new_data_processors.common import (
18
    UICTableUpdater,
19
)
20
from src.kalauz.new_data_processors.common_excel_processors import ExcelProcessorSimple
21
from src.kalauz.new_data_processors.helper_table_updaters.countries import (
22
    CountriesUpdater,
23
)
24
25
26
@final
27
# TODO: add logging
28
class CompaniesUpdater(ExcelProcessorSimple, UICTableUpdater):
29
    TABLE_NAME: ClassVar[str] = "companies"
30
    database_metadata: ClassVar[MetaData] = MetaData()
31
32
    table: ClassVar[Table] = Table(
33
        TABLE_NAME,
34
        database_metadata,
35
        Column(
36
            name="code_uic",
37
            type_=SmallInteger,
38
            nullable=False,
39
            index=True,
40
            primary_key=True,
41
        ),
42
        Column(name="short_name", type_=String(255)),
43
        Column(name="name", type_=String(255), nullable=False),
44
        Column(
45
            "country_code_iso",
46
            String(2),
47
            ForeignKey(CountriesUpdater.table.c.code_iso),
48
            nullable=False,
49
        ),
50
        Column(name="allocation_date", type_=Date),
51
        Column(name="modified_date", type_=Date),
52
        Column(name="begin_of_validity", type_=Date),
53
        Column(name="end_of_validity", type_=Date),
54
        Column(name="freight", type_=Boolean, nullable=False),
55
        Column(name="passenger", type_=Boolean, nullable=False),
56
        Column(name="infrastructure", type_=Boolean, nullable=False),
57
        Column(name="holding", type_=Boolean),
58
        Column(name="integrated", type_=Boolean),
59
        Column(name="other", type_=Boolean, nullable=False),
60
        Column(name="url", type_=String(255)),
61
    )
62
63
    def __init__(self) -> None:
64
        super().__init__()
65
66
        self.DATA_URL = f"{self.DATA_BASE_URL}3023"
67
68
        self._data_to_process = self.get_data(self.DATA_URL)
69
70
        self.logger.info(f"{self.__class__.__name__} initialized!")
71
72
    def correct_column_names(self) -> None:
73
        self.replace_nonword_with_underscore()
74
        self.rename_columns_manually()
75
76
    def replace_nonword_with_underscore(self) -> None:
77
        # TODO: report wrong display of newlines in DataFrame view to pandas developers
78
        self.data.rename(
79
            columns=lambda x: re.sub(
80
                pattern=r"[\n ]",
81
                repl="_",
82
                string=x,
83
            ).lower(),
84
            inplace=True,
85
        )
86
87
    def rename_columns_manually(self) -> None:
88
        self.data.rename(
89
            columns={
90
                "code": "code_uic",
91
                "full_name": "name",
92
                "country": "country_code_iso",
93
                "infra-_structure": "infrastructure",
94
                "url": "url",
95
            },
96
            inplace=True,
97
        )
98
99
    def delete_data(self) -> None:
100
        self.remove_invalid_and_irrelevant_companies()
101
        self.drop_unnecessary_columns()
102
        self.remove_invalid_companies()
103
104
    def remove_invalid_and_irrelevant_companies(self) -> None:
105
        self.data.dropna(
106
            subset=[
107
                "allocation_date",
108
                "infrastructure",
109
            ],
110
            inplace=True,
111
        )
112
113
    def drop_unnecessary_columns(self) -> None:
114
        self.data.drop(
115
            columns=[
116
                "request_date",
117
                "recent",
118
            ],
119
            inplace=True,
120
        )
121
122
    def remove_invalid_companies(self) -> None:
123
        # future: report bug below to JetBrains or pandas developers
124
        # noinspection PyUnusedLocal
125
        today = datetime.today()
126
        self.data.query(
127
            expr="(begin_of_validity.isnull() or begin_of_validity <= @today) and (end_of_validity.isnull() or @today <= end_of_validity)",
128
            inplace=True,
129
        )
130
131
    def correct_boolean_values(self) -> None:
132
        boolean_columns = [
133
            "freight",
134
            "passenger",
135
            "infrastructure",
136
            "holding",
137
            "integrated",
138
            "other",
139
        ]
140
        for column in boolean_columns:
141
            self.data[column] = self.data[column].apply(lambda x: x == "x" or x == "X")
142
143
    def add_data(self) -> None:
144
        with self.database.engine.begin() as connection:
145
            queries = [
146
                """
147
                insert ignore into companies (
148
                    code_uic,
149
                    short_name,
150
                    name,
151
                    country_code_iso,
152
                    allocation_date,
153
                    modified_date,
154
                    begin_of_validity,
155
                    end_of_validity,
156
                    freight,
157
                    passenger,
158
                    infrastructure,
159
                    holding,
160
                    integrated,
161
                    other,
162
                    url
163
                )
164
                values (
165
                    :code_uic,
166
                    :short_name,
167
                    :name,
168
                    :country_code_iso,
169
                    :allocation_date,
170
                    :modified_date,
171
                    :begin_of_validity,
172
                    :end_of_validity,
173
                    :freight,
174
                    :passenger,
175
                    :infrastructure,
176
                    :holding,
177
                    :integrated,
178
                    :other,
179
                    :url
180
                )
181
                """,
182
                """
183
                update companies
184
                set
185
                    short_name = :short_name,
186
                    name = :name,
187
                    country_code_iso = :country_code_iso,
188
                    allocation_date = :allocation_date,
189
                    modified_date = :modified_date,
190
                    begin_of_validity = :begin_of_validity,
191
                    end_of_validity = :end_of_validity,
192
                    freight = :freight,
193
                    passenger = :passenger,
194
                    infrastructure = :infrastructure,
195
                    holding = :holding,
196
                    integrated = :integrated,
197
                    other = :other,
198
                    url = :url
199
                where
200
                    code_uic = :code_uic
201
                """,
202
            ]
203
204
            for index, row in self.data.iterrows():
205
                for query in queries:
206
                    connection.execute(
207
                        text(query),
208
                        row.to_dict(),
209
                    )
210