companies - Code Metrics - gy-mate/kalauz - Measure and Improve Code Quality continuously with Scrutinizer

companies A
last analyzed 2025-10-18 11:55 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	209
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	117
dl	0
loc	209
rs	10
c	0
b	0
f	0
wmc	16

10 Methods

Rating	Name	Size	Complexity
A	CompaniesUpdater.correct_column_names()	3	1
A	CompaniesUpdater.drop_unnecessary_columns()	7	1
A	CompaniesUpdater.rename_columns_manually()	10	1
A	CompaniesUpdater.replace_nonword_with_underscore()	9	2
A	CompaniesUpdater.correct_boolean_values()	11	3
A	CompaniesUpdater.__init__()	8	1
A	CompaniesUpdater.remove_invalid_companies()	7	1
A	CompaniesUpdater.delete_data()	4	1
A	CompaniesUpdater.remove_invalid_and_irrelevant_companies()	7	1
A	CompaniesUpdater.add_data()	66	4

from datetime import datetime
import re
from typing import ClassVar, final

from sqlalchemy import (
    Boolean,
    Column,
    Date,
    ForeignKey,
    MetaData,
    SmallInteger,
    String,
    Table,
    text,
)

from src.kalauz.new_data_processors.common import (
    UICTableUpdater,
)
from src.kalauz.new_data_processors.common_excel_processors import ExcelProcessorSimple
from src.kalauz.new_data_processors.helper_table_updaters.countries import (
    CountriesUpdater,
)


@final
# TODO: add logging
class CompaniesUpdater(ExcelProcessorSimple, UICTableUpdater):
    TABLE_NAME: ClassVar[str] = "companies"
    database_metadata: ClassVar[MetaData] = MetaData()

    table: ClassVar[Table] = Table(
        TABLE_NAME,
        database_metadata,
        Column(
            name="code_uic",
            type_=SmallInteger,
            nullable=False,
            index=True,
            primary_key=True,
        ),
        Column(name="short_name", type_=String(255)),
        Column(name="name", type_=String(255), nullable=False),
        Column(
            "country_code_iso",
            String(2),
            ForeignKey(CountriesUpdater.table.c.code_iso),
            nullable=False,
        ),
        Column(name="allocation_date", type_=Date),
        Column(name="modified_date", type_=Date),
        Column(name="begin_of_validity", type_=Date),
        Column(name="end_of_validity", type_=Date),
        Column(name="freight", type_=Boolean, nullable=False),
        Column(name="passenger", type_=Boolean, nullable=False),
        Column(name="infrastructure", type_=Boolean, nullable=False),
        Column(name="holding", type_=Boolean),
        Column(name="integrated", type_=Boolean),
        Column(name="other", type_=Boolean, nullable=False),
        Column(name="url", type_=String(255)),
    )

    def __init__(self) -> None:
        super().__init__()

        self.DATA_URL = f"{self.DATA_BASE_URL}3023"

        self._data_to_process = self.get_data(self.DATA_URL)

        self.logger.info(f"{self.__class__.__name__} initialized!")

    def correct_column_names(self) -> None:
        self.replace_nonword_with_underscore()
        self.rename_columns_manually()

    def replace_nonword_with_underscore(self) -> None:
        # TODO: report wrong display of newlines in DataFrame view to pandas developers
        self.data.rename(
            columns=lambda x: re.sub(
                pattern=r"[\n ]",
                repl="_",
                string=x,
            ).lower(),
            inplace=True,
        )

    def rename_columns_manually(self) -> None:
        self.data.rename(
            columns={
                "code": "code_uic",
                "full_name": "name",
                "country": "country_code_iso",
                "infra-_structure": "infrastructure",
                "url": "url",
            },
            inplace=True,
        )

    def delete_data(self) -> None:
        self.remove_invalid_and_irrelevant_companies()
        self.drop_unnecessary_columns()
        self.remove_invalid_companies()

    def remove_invalid_and_irrelevant_companies(self) -> None:
        self.data.dropna(
            subset=[
                "allocation_date",
                "infrastructure",
            ],
            inplace=True,
        )

    def drop_unnecessary_columns(self) -> None:
        self.data.drop(
            columns=[
                "request_date",
                "recent",
            ],
            inplace=True,
        )

    def remove_invalid_companies(self) -> None:
        # future: report bug below to JetBrains or pandas developers
        # noinspection PyUnusedLocal
        today = datetime.today()
        self.data.query(
            expr="(begin_of_validity.isnull() or begin_of_validity <= @today) and (end_of_validity.isnull() or @today <= end_of_validity)",
            inplace=True,
        )

    def correct_boolean_values(self) -> None:
        boolean_columns = [
            "freight",
            "passenger",
            "infrastructure",
            "holding",
            "integrated",
            "other",
        ]
        for column in boolean_columns:
            self.data[column] = self.data[column].apply(lambda x: x == "x" or x == "X")

    def add_data(self) -> None:
        with self.database.engine.begin() as connection:
            queries = [
                """
                insert ignore into companies (
                    code_uic,
                    short_name,
                    name,
                    country_code_iso,
                    allocation_date,
                    modified_date,
                    begin_of_validity,
                    end_of_validity,
                    freight,
                    passenger,
                    infrastructure,
                    holding,
                    integrated,
                    other,
                    url
                )
                values (
                    :code_uic,
                    :short_name,
                    :name,
                    :country_code_iso,
                    :allocation_date,
                    :modified_date,
                    :begin_of_validity,
                    :end_of_validity,
                    :freight,
                    :passenger,
                    :infrastructure,
                    :holding,
                    :integrated,
                    :other,
                    :url
                )
                """,
                """
                update companies
                set
                    short_name = :short_name,
                    name = :name,
                    country_code_iso = :country_code_iso,
                    allocation_date = :allocation_date,
                    modified_date = :modified_date,
                    begin_of_validity = :begin_of_validity,
                    end_of_validity = :end_of_validity,
                    freight = :freight,
                    passenger = :passenger,
                    infrastructure = :infrastructure,
                    holding = :holding,
                    integrated = :integrated,
                    other = :other,
                    url = :url
                where
                    code_uic = :code_uic
                """,
            ]

            for index, row in self.data.iterrows():
                for query in queries:
                    connection.execute(
                        text(query),
                        row.to_dict(),
                    )


1			from datetime import datetime
2			import re
3			from typing import ClassVar, final
4
5			from sqlalchemy import (
6			Boolean,
7			Column,
8			Date,
9			ForeignKey,
10			MetaData,
11			SmallInteger,
12			String,
13			Table,
14			text,
15			)
16
17			from src.kalauz.new_data_processors.common import (
18			UICTableUpdater,
19			)
20			from src.kalauz.new_data_processors.common_excel_processors import ExcelProcessorSimple
21			from src.kalauz.new_data_processors.helper_table_updaters.countries import (
22			CountriesUpdater,
23			)
24
25
26			@final
27			# TODO: add logging
28			class CompaniesUpdater(ExcelProcessorSimple, UICTableUpdater):
29			TABLE_NAME: ClassVar[str] = "companies"
30			database_metadata: ClassVar[MetaData] = MetaData()
31
32			table: ClassVar[Table] = Table(
33			TABLE_NAME,
34			database_metadata,
35			Column(
36			name="code_uic",
37			type_=SmallInteger,
38			nullable=False,
39			index=True,
40			primary_key=True,
41			),
42			Column(name="short_name", type_=String(255)),
43			Column(name="name", type_=String(255), nullable=False),
44			Column(
45			"country_code_iso",
46			String(2),
47			ForeignKey(CountriesUpdater.table.c.code_iso),
48			nullable=False,
49			),
50			Column(name="allocation_date", type_=Date),
51			Column(name="modified_date", type_=Date),
52			Column(name="begin_of_validity", type_=Date),
53			Column(name="end_of_validity", type_=Date),
54			Column(name="freight", type_=Boolean, nullable=False),
55			Column(name="passenger", type_=Boolean, nullable=False),
56			Column(name="infrastructure", type_=Boolean, nullable=False),
57			Column(name="holding", type_=Boolean),
58			Column(name="integrated", type_=Boolean),
59			Column(name="other", type_=Boolean, nullable=False),
60			Column(name="url", type_=String(255)),
61			)
62
63			def __init__(self) -> None:
64			super().__init__()
65
66			self.DATA_URL = f"{self.DATA_BASE_URL}3023"
67
68			self._data_to_process = self.get_data(self.DATA_URL)
69
70			self.logger.info(f"{self.__class__.__name__} initialized!")
71
72			def correct_column_names(self) -> None:
73			self.replace_nonword_with_underscore()
74			self.rename_columns_manually()
75
76			def replace_nonword_with_underscore(self) -> None:
77			# TODO: report wrong display of newlines in DataFrame view to pandas developers
78			self.data.rename(
79			columns=lambda x: re.sub(
80			pattern=r"[\n ]",
81			repl="_",
82			string=x,
83			).lower(),
84			inplace=True,
85			)
86
87			def rename_columns_manually(self) -> None:
88			self.data.rename(
89			columns={
90			"code": "code_uic",
91			"full_name": "name",
92			"country": "country_code_iso",
93			"infra-_structure": "infrastructure",
94			"url": "url",
95			},
96			inplace=True,
97			)
98
99			def delete_data(self) -> None:
100			self.remove_invalid_and_irrelevant_companies()
101			self.drop_unnecessary_columns()
102			self.remove_invalid_companies()
103
104			def remove_invalid_and_irrelevant_companies(self) -> None:
105			self.data.dropna(
106			subset=[
107			"allocation_date",
108			"infrastructure",
109			],
110			inplace=True,
111			)
112
113			def drop_unnecessary_columns(self) -> None:
114			self.data.drop(
115			columns=[
116			"request_date",
117			"recent",
118			],
119			inplace=True,
120			)
121
122			def remove_invalid_companies(self) -> None:
123			# future: report bug below to JetBrains or pandas developers
124			# noinspection PyUnusedLocal
125			today = datetime.today()
126			self.data.query(
127			expr="(begin_of_validity.isnull() or begin_of_validity <= @today) and (end_of_validity.isnull() or @today <= end_of_validity)",
128			inplace=True,
129			)
130
131			def correct_boolean_values(self) -> None:
132			boolean_columns = [
133			"freight",
134			"passenger",
135			"infrastructure",
136			"holding",
137			"integrated",
138			"other",
139			]
140			for column in boolean_columns:
141			self.data[column] = self.data[column].apply(lambda x: x == "x" or x == "X")
142
143			def add_data(self) -> None:
144			with self.database.engine.begin() as connection:
145			queries = [
146			"""
147			insert ignore into companies (
148			code_uic,
149			short_name,
150			name,
151			country_code_iso,
152			allocation_date,
153			modified_date,
154			begin_of_validity,
155			end_of_validity,
156			freight,
157			passenger,
158			infrastructure,
159			holding,
160			integrated,
161			other,
162			url
163			)
164			values (
165			:code_uic,
166			:short_name,
167			:name,
168			:country_code_iso,
169			:allocation_date,
170			:modified_date,
171			:begin_of_validity,
172			:end_of_validity,
173			:freight,
174			:passenger,
175			:infrastructure,
176			:holding,
177			:integrated,
178			:other,
179			:url
180			)
181			""",
182			"""
183			update companies
184			set
185			short_name = :short_name,
186			name = :name,
187			country_code_iso = :country_code_iso,
188			allocation_date = :allocation_date,
189			modified_date = :modified_date,
190			begin_of_validity = :begin_of_validity,
191			end_of_validity = :end_of_validity,
192			freight = :freight,
193			passenger = :passenger,
194			infrastructure = :infrastructure,
195			holding = :holding,
196			integrated = :integrated,
197			other = :other,
198			url = :url
199			where
200			code_uic = :code_uic
201			""",
202			]
203
204			for index, row in self.data.iterrows():
205			for query in queries:
206			connection.execute(
207			text(query),
208			row.to_dict(),
209			)
210

gy-mate / kalauz

companies A last analyzed 2025-10-18 11:55 UTC

Complexity

Size/Duplication

Importance

10 Methods

Duplication Side-by-Side

Filter issues like

companies A
last analyzed 2025-10-18 11:55 UTC