Completed
Push — master ( 4a89ff...ac1f79 )
by Oliver
02:00
created

person._Politician_default.scrape_wiki_for_ward()   A

Complexity

Conditions 1

Size

Total Lines 10
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 10
rs 9.95
c 0
b 0
f 0
cc 1
nop 1
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""
5
A set of dataclasses concerning roles of persons and their particulars.
6
"""
7
import datetime
8
import os
9
import sys
10
from dataclasses import dataclass, field
11
from typing import List, Optional, Set, Tuple
12
13
from gender_guesser import detector as sex  # type: ignore
14
15
PACKAGE_PARENT = ".."
16
SCRIPT_DIR = os.path.dirname(
17
    os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))
18
)  # isort:skip # noqa # pylint: disable=wrong-import-position
19
sys.path.append(
20
    os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))
21
)  # isort: skip # noqa # pylint: disable=wrong-import-position
22
23
from src.resources.constants import GERMAN_PARTIES  # type: ignore  # noqa
24
from src.resources.constants import PEER_PREPOSITIONS  # type: ignore # noqa
25
from src.resources.constants import PEERTITLES  # type: ignore # noqa
26
from src.resources.helpers import (  # type: ignore # noqa; type: ignore # noqa; type: ignore  # noqa; type: ignore # noqa
27
    AttrDisplay,
28
    NotInRange,
29
    Party,
30
    TooManyFirstNames,
31
)
32
33
34
@dataclass
35
class _Name_default:
36
    middle_name_1: Optional[str] = field(default=None)
37
    middle_name_2: Optional[str] = field(default=None)
38
    maiden_name: Optional[str] = field(default=None)
39
    divorcée: Optional[str] = field(default=None)
40
41
42
@dataclass
43
class _Name_base:
44
    first_name: str
45
    last_name: str
46
47
48
@dataclass
49
class Name(_Name_default, _Name_base, AttrDisplay):
50
51
    """
52
    The most basic part to describe a person.
53
    To add more middle names, dataclass _Name_default has to be given further
54
    middle_name attributes. Since this project currently focusses on German
55
    politicians, the limit of three given names is preserved.
56
    """
57
58
    def __post_init__(self):
59
        """
60
        In case a Name instance is initialized with all first names in one
61
        string, __post_init__ will take care of this and assign each first
62
        name its attribute. Also it will raise TooManyFirstNames if more than
63
        three first names are given.
64
        """
65
        first_names = self.first_name.split(" ")
66
        self.first_name = first_names[0]
67
        if len(first_names) == 2:
68
            self.middle_name_1 = first_names[1]
69
        elif len(first_names) == 3:
70
            self.middle_name_1 = first_names[1]
71
            self.middle_name_2 = first_names[-1]
72
        elif len(first_names) > 3:
73
            print(first_names)
74
            raise TooManyFirstNames("There are more than three first names!")
75
76
77
@dataclass
78
class _Peertitle_default:
79
    peer_title: Optional[str] = field(default=None)
80
    peer_preposition: Optional[str] = field(default=None)
81
82
    def nobility_title(self) -> None:
83
        if self.peer_title is not None:
84
            title = self.peer_title
85
            print("title", title)
86
            self.peer_title, self.peer_preposition = self.title_fix(title)
87
88
    def title_fix(self, title) -> Tuple[str, str]:
89
        titles = title.split(" ")
90
        title_tmp = ""
91
        preposition_tmp = ""
92
        for prep in titles:
93
            if prep.lower() in PEER_PREPOSITIONS:
94
                preposition_tmp = preposition_tmp + prep.lower() + " "
95
            elif prep in PEERTITLES:
96
                title_tmp = title_tmp + prep + " "
97
        peer_preposition = preposition_tmp.strip()
98
        peer_title = title_tmp.strip()
99
        print("peer_title", peer_title)
100
        print("peer_prep", peer_preposition)
101
102
        return peer_title, peer_preposition
103
104
105
@dataclass
106
class Noble(_Peertitle_default, Name, AttrDisplay):
107
    def __post_init__(self):
108
        """Initialize names and titles."""
109
        Name.__post_init__(self)
110
        self.nobility_title()
111
112
113
@dataclass
114
class _Academic_title_default:
115
    academic_title: Optional[str] = field(default=None)
116
117
    def degree_title(self) -> None:
118
        if self.academic_title is not None:
119
            title = self.academic_title
120
            self.academic_title = self.title_repair(title)
121
122
    def title_repair(self, title) -> str:
123
        if ".D" in title:
124
            title = ". ".join(c for c in title.split("."))
125
        if ".A" in title:
126
            title = ". ".join(c for c in title.split("."))
127
        if title.endswith("Dr"):
128
            title = title[:-2] + "Dr."
129
        while "  " in title:
130
            title = title.replace("  ", " ")
131
        title = title.strip()
132
133
        return title
134
135
136
@dataclass
137
class Academic(_Academic_title_default, Name, AttrDisplay):
138
    def __post_init__(self):
139
        Name.__post_init__(self)
140
        self.degree_title()
141
142
143
@dataclass
144
class _Person_default:
145
    gender: str = field(default="unknown")
146
    born: str = field(default="unknown")
147
    date_of_birth: str = field(default="unknown")
148
    age: str = field(default="unknown")
149
    deceased: str = field(default="unknown")
150
    profession: str = field(default="unknown")
151
152
153
@dataclass
154
class Person(
155
    _Peertitle_default,
156
    _Academic_title_default,
157
    _Person_default,
158
    Name,
159
    AttrDisplay,  # noqa
160
):
161
    def __post_init__(self):
162
        Name.__post_init__(self)
163
        Academic.__post_init__(self)
164
        self.get_sex()
165
        self.get_year_of_birth()
166
        self.get_age()
167
168
    def get_sex(self) -> None:
169
        if "-" in self.first_name:
170
            first_name = self.first_name.split("-")[0]
171
        else:
172
            first_name = self.first_name
173
        d = sex.Detector()
174
        gender = d.get_gender(f"{first_name}")
175
        if "female" in gender:
176
            self.gender = "female"
177
        elif "male" in gender:
178
            self.gender = "male"
179
180
    def get_year_of_birth(self) -> None:
181
        if self.date_of_birth != "unknown":
182
            self.born = self.date_of_birth.split(".")[-1]
183
184
    def get_age(self) -> None:
185
        if self.born != "unknown":
186
            born = str(self.born)
187
            if len(born) > 4:
188
                self.deceased = born.strip()[5:]
189
                self.born = born[:4]
190
            else:
191
                today = datetime.date.today()
192
                self.age = str(int(today.year) - int(born.strip()))
193
194
195
@dataclass
196
class _Politician_default:
197
    electoral_ward: str = field(default="ew")
198
    ward_no: Optional[int] = field(default=None)
199
    voter_count: Optional[int] = field(default=None)
200
    minister: Optional[str] = field(default=None)
201
    offices: List[str] = field(default_factory=lambda: [])
202
    parties: List[str] = field(default_factory=lambda: [])
203
204
    def renamed_wards(self):
205
        wards = {
206
            "Kreis Aachen I": "Aachen III",
207
            "Hochsauerlandkreis II – Soest III": "Hochsauerlandkreis II",
208
            "Kreis Aachen II": "Aachen IV"
209
            if self.last_name in ["Wirtz", "Weidenhaupt"]
210
            else "Kreis Aachen I",
211
        }
212
        if self.electoral_ward in wards.keys():
213
            self.electoral_ward = wards[self.electoral_ward]
214
215
    def scrape_wiki_for_ward(self):
216
        import requests
217
        from bs4 import BeautifulSoup  # type: ignore
218
219
        URL_base = "https://de.wikipedia.org/wiki/Landtagswahlkreis_{}"
220
        URL = URL_base.format(self.electoral_ward)
221
        req = requests.get(URL)
222
        bsObj = BeautifulSoup(req.text, "lxml")
223
        table = bsObj.find(class_="infobox float-right toptextcells")
224
        self.scrape_wiki_table_for_ward(table)
225
226
    def scrape_wiki_table_for_ward(self, table):
227
        for td in table.find_all("td"):
228
            if "Wahlkreisnummer" in td.text:
229
                ward_no = td.find_next().text.strip()
230
                ward_no = ward_no.split(" ")[0]
231
                self.ward_no = int(ward_no)
232
            elif "Wahlberechtigte" in td.text:
233
                voter_count = td.find_next().text.strip()
234
                if voter_count[-1] == "]":
235
                    voter_count = voter_count[:-3]
236
                if " " in voter_count:
237
                    voter_count = "".join(voter_count.split(" "))
238
                else:
239
                    voter_count = "".join(voter_count.split("."))
240
                self.voter_count = int(voter_count)
241
242
243
@dataclass
244
class Politician(
245
    _Peertitle_default,
246
    _Academic_title_default,
247
    _Person_default,
248
    _Politician_default,
249
    _Name_default,
250
    Party,
251
    _Name_base,
252
    AttrDisplay,
253
):
254
    def __post_init__(self):
255
        Name.__post_init__(self)
256
        Academic.__post_init__(self)
257
        Noble.__post_init__(self)
258
        Party.__post_init__(self)
259
        Person.get_sex(self)
260
        Person.get_age(self)
261
        self.change_ward()
262
        if self.party_name in GERMAN_PARTIES:
263
            self.parties.append(
264
                Party(self.party_name, self.party_entry, self.party_exit)
265
            )
266
        if self.minister and self.minister not in self.offices:
267
            self.offices.append(self.minister)
268
269
    def add_Party(
270
        self, party_name, party_entry="unknown", party_exit="unknown"
271
    ):  # noqa
272
        if party_name in GERMAN_PARTIES:
273
            if self.party_is_in_parties(party_name, party_entry, party_exit):
274
                pass
275
            else:
276
                self.parties.append(Party(party_name, party_entry, party_exit))
277
                self.party_name = party_name
278
                self.party_entry = party_entry
279
                self.party_exit = party_exit
280
281
    def align_party_entries(
282
        self, party, party_name, party_entry, party_exit
283
    ) -> Party:  # noqa
284
        if party_entry != "unknown" and party.party_entry == "unknown":
285
            party.party_entry = party_entry
286
        if party_exit != "unknown" and party.party_exit == "unknown":
287
            party.party_exit = party_exit
288
        return party
289
290
    def party_is_in_parties(self, party_name, party_entry, party_exit):
291
        parties_tmp = self.parties[:]
292
        for party in parties_tmp:
293
            if party_name == party.party_name:
294
                party_updated = self.align_party_entries(
295
                    party, party_name, party_entry, party_exit
296
                )
297
                self.parties.remove(party)
298
                self.parties.append(party_updated)
299
                self.party_entry = party_updated.party_entry
300
                self.party_exit = party_updated.party_exit
301
                return True
302
        return False
303
304
    def change_ward(self, ward=None):
305
        if ward:
306
            self.electoral_ward = ward
307
        if self.electoral_ward not in ["ew", "Landesliste"]:
308
            self.renamed_wards()
309
            self.scrape_wiki_for_ward()
310
        else:
311
            self.electoral_ward = "ew"
312
313
314
@dataclass
315
class _MdL_default:
316
    parl_pres: bool = field(default=False)
317
    parl_vicePres: bool = field(default=False)
318
    parliament_entry: str = field(default="unknown")  # date string: "11.3.2015"  # noqa
319
    parliament_exit: str = field(default="unknown")  # dto.
320
    speeches: List[str] = field(
321
        default_factory=lambda: []
322
    )  # identifiers for speeches  # noqa
323
    reactions: List[str] = field(
324
        default_factory=lambda: []
325
    )  # identifiers for reactions
326
    membership: Set[str] = field(
327
        default_factory=lambda: set()
328
    )  # years like ["2010", "2011", ...]
329
330
331
@dataclass
332
class _MdL_base:
333
    legislature: int
334
    state: str  # this would be "NRW", "BY", ...
335
336
337
@dataclass
338
class MdL(_MdL_default, Politician, _MdL_base, AttrDisplay):
339
    def __post_init__(self):
340
        if int(self.legislature) not in range(14, 18):
341
            raise NotInRange("Number for legislature not in range")
342
        else:
343
            self.membership.add(self.legislature)
344
        Politician.__post_init__(self)
345
346
347
if __name__ == "__main__":
348
349
    name = Name("Hans Hermann", "Werner")
350
    print(name)
351
352
    noble = Noble("Dagmara", "Bodelschwingh", peer_title="Gräfin von")
353
    print(noble)
354
355
    academic = Academic("Horst Heiner", "Wiekeiner", academic_title="Dr.")  # noqa
356
    print(academic)
357
358
    person_1 = Person("Sven", "Rübennase", academic_title="MBA", born="1990")  # noqa
359
    print(person_1)
360
361
    politician = Politician(
362
        "Bärbel",
363
        "Gutherz",
364
        "SPD",
365
        academic_title="Dr.",
366
        born="1980",
367
        electoral_ward="Köln I",
368
    )
369
    print(politician)
370
371
    mdl = MdL(
372
        14,
373
        "NRW",
374
        "Tom",
375
        "Schwadronius",
376
        "SPD",
377
        party_entry="1990",  # type: ignore
378
        peer_title="Junker von",
379
        born="1950",
380
    )
381
    print(mdl)
382
383
    mdl.add_Party("Grüne", party_entry="30.11.1999")
384
    mdl.change_ward("Düsseldorf II")
385
    print(mdl)
386