Passed
Push — master ( 7db8f9...61eb58 )
by Fabio
01:02
created

codicefiscale.codicefiscale   A

Complexity

Total Complexity 42

Size/Duplication

Total Lines 475
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 42
eloc 271
dl 0
loc 475
rs 9.0399
c 0
b 0
f 0

17 Functions

Rating   Name   Duplication   Size   Complexity  
B encode_birthdate() 0 41 8
B decode() 0 60 5
A encode() 0 28 1
A is_valid() 0 15 2
A decode_raw() 0 34 2
A _get_vowels() 0 2 1
A _get_omocodes() 0 9 1
A encode_birthplace() 0 36 3
A _get_consonants() 0 2 1
A _get_omocode() 0 8 2
A is_omocode() 0 14 1
A encode_surname() 0 15 1
A _get_consonants_and_vowels() 0 2 1
A encode_name() 0 19 2
A _get_data() 0 2 1
B _get_indexed_data() 0 32 6
A encode_cin() 0 26 4

How to fix   Complexity   

Complexity

Complex classes like codicefiscale.codicefiscale often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
from datetime import datetime
4
from dateutil import parser as date_parser
5
from itertools import combinations
6
from slugify import slugify
7
8
import fsutil
9
import re
10
import string
11
12
try:
13
    maketrans = "".maketrans
14
except AttributeError:
15
    # fallback for Python 2
16
    maketrans = string.maketrans
17
18
19
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz")
20
_VOWELS = list("aeiou")
21
_MONTHS = list("ABCDEHLMPRST")
22
_CIN = {
23
    "0": (0, 1),
24
    "1": (1, 0),
25
    "2": (2, 5),
26
    "3": (3, 7),
27
    "4": (4, 9),
28
    "5": (5, 13),
29
    "6": (6, 15),
30
    "7": (7, 17),
31
    "8": (8, 19),
32
    "9": (9, 21),
33
    "A": (0, 1),
34
    "B": (1, 0),
35
    "C": (2, 5),
36
    "D": (3, 7),
37
    "E": (4, 9),
38
    "F": (5, 13),
39
    "G": (6, 15),
40
    "H": (7, 17),
41
    "I": (8, 19),
42
    "J": (9, 21),
43
    "K": (10, 2),
44
    "L": (11, 4),
45
    "M": (12, 18),
46
    "N": (13, 20),
47
    "O": (14, 11),
48
    "P": (15, 3),
49
    "Q": (16, 6),
50
    "R": (17, 8),
51
    "S": (18, 12),
52
    "T": (19, 14),
53
    "U": (20, 16),
54
    "V": (21, 10),
55
    "W": (22, 22),
56
    "X": (23, 25),
57
    "Y": (24, 24),
58
    "Z": (25, 23),
59
}
60
_CIN_REMAINDERS = list(string.ascii_uppercase)
61
62
_OMOCODIA = {
63
    "0": "L",
64
    "1": "M",
65
    "2": "N",
66
    "3": "P",
67
    "4": "Q",
68
    "5": "R",
69
    "6": "S",
70
    "7": "T",
71
    "8": "U",
72
    "9": "V",
73
}
74
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA])
75
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
76
_OMOCODIA_ENCODE_TRANS = maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS)
77
_OMOCODIA_DECODE_TRANS = maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS)
78
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14]))
79
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]]
80
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
81
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
82
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
83
84
85
def _get_data(filename):
86
    return fsutil.read_file_json(fsutil.join_path(__file__, "data/{}".format(filename)))
87
88
89
def _get_indexed_data():
90
    municipalities = _get_data("municipalities.json")
91
    countries = _get_data("countries.json")
92
    data = {
93
        "municipalities": {},
94
        "countries": {},
95
        "codes": {},
96
    }
97
98
    for municipality in municipalities:
99
        if not municipality["active"]:
100
            continue
101
        code = municipality["code"]
102
        province = municipality["province"].lower()
103
        names = municipality["name_slugs"]
104
        for name in names:
105
            data["municipalities"][name] = municipality
106
            data["municipalities"][name + "-" + province] = municipality
107
        assert (
108
            code not in data["codes"]
109
        ), "Found more than one municipality with the same code, expected a one-to-one relation."
110
        data["codes"][code] = municipality
111
112
    for country in countries:
113
        code = country["code"]
114
        names = country["name"].strip().split("/")
115
        for name in names:
116
            key = slugify(name)
117
            data["countries"][key] = country
118
        data["codes"][code] = country
119
120
    return data
121
122
123
_DATA = _get_indexed_data()
124
125
CODICEFISCALE_RE = re.compile(
126
    r"^"
127
    r"([a-z]{3})"
128
    r"([a-z]{3})"
129
    r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))"
130
    r"([a-z]{1}[a-z\d]{3})"
131
    r"([a-z]{1})$",
132
    re.IGNORECASE,
133
)
134
135
136
def _get_consonants(s):
137
    return [char for char in s if char in _CONSONANTS]
138
139
140
def _get_vowels(s):
141
    return [char for char in s if char in _VOWELS]
142
143
144
def _get_consonants_and_vowels(consonants, vowels):
145
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
146
147
148
def _get_omocode(code, subs, trans):
149
    code_chars = list(code[0:15])
150
    for i in subs:
151
        code_chars[i] = code_chars[i].translate(trans)
152
    code = "".join(code_chars)
153
    code_cin = encode_cin(code)
154
    code += code_cin
155
    return code
156
157
158
def _get_omocodes(code):
159
    code_root = _get_omocode(
160
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
161
    )
162
    codes = [
163
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
164
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
165
    ]
166
    return codes
167
168
169
def encode_surname(surname):
170
    """
171
    Encode surname to the code used in italian fiscal code.
172
173
    :param surname: The surname
174
    :type surname: string
175
176
    :returns: The code used in italian fiscal code
177
    :rtype: string
178
    """
179
    surname_slug = slugify(surname)
180
    surname_consonants = _get_consonants(surname_slug)
181
    surname_vowels = _get_vowels(surname_slug)
182
    surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels)
183
    return surname_code
184
185
186
def encode_name(name):
187
    """
188
    Encodes name to the code used in italian fiscal code.
189
190
    :param name: The name
191
    :type name: string
192
193
    :returns: The code used in italian fiscal code
194
    :rtype: string
195
    """
196
    name_slug = slugify(name)
197
    name_consonants = _get_consonants(name_slug)
198
199
    if len(name_consonants) > 3:
200
        del name_consonants[1]
201
202
    name_vowels = _get_vowels(name_slug)
203
    name_code = _get_consonants_and_vowels(name_consonants, name_vowels)
204
    return name_code
205
206
207
def encode_birthdate(birthdate, sex):
208
    """
209
    Encodes birthdate to the code used in italian fiscal code.
210
211
    :param birthdate: The birthdate
212
    :type birthdate: datetime or string
213
    :param sex: The sex, 'M' or 'F'
214
    :type sex: string
215
216
    :returns: The code used in italian fiscal code
217
    :rtype: string
218
    """
219
    if not birthdate:
220
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
221
222
    if not sex:
223
        raise ValueError("[codicefiscale] 'sex' argument cant be None")
224
225
    sex = sex.upper()
226
227
    if sex not in ["M", "F"]:
228
        raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'")
229
230
    if isinstance(birthdate, datetime):
231
        date_obj = birthdate
232
    else:
233
        date_slug = slugify(birthdate)
234
        date_parts = date_slug.split("-")[:3]
235
        date_kwargs = (
236
            {"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True}
237
        )
238
        try:
239
            date_obj = date_parser.parse(date_slug, **date_kwargs)
240
        except ValueError as e:
241
            raise ValueError("[codicefiscale] {}".format(e))
242
243
    year_code = str(date_obj.year)[2:]
244
    month_code = _MONTHS[date_obj.month - 1]
245
    day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper()
246
    date_code = year_code + month_code + day_code
247
    return date_code
248
249
250
def encode_birthplace(birthplace):
251
    """
252
    Encodes birthplace to the code used in italian fiscal code.
253
254
    :param birthplace: The birthplace
255
    :type birthplace: string
256
257
    :returns: The code used in italian fiscal code
258
    :rtype: string
259
    """
260
    if not birthplace:
261
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
262
263
    def find_birthplace_code(birthplace):
264
        birthplace_slug = slugify(birthplace)
265
        birthplace_code = birthplace_slug.upper()
266
        birthplace_data = _DATA["municipalities"].get(
267
            birthplace_slug,
268
            _DATA["countries"].get(
269
                birthplace_slug, _DATA["codes"].get(birthplace_code, {})
270
            ),
271
        )
272
        return birthplace_data.get("code", "")
273
274
    birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code(
275
        re.split(r",|\(", birthplace)[0]
276
    )
277
278
    if birthplace_code == "":
279
        raise ValueError(
280
            "[codicefiscale] 'birthplace' argument not mapped to code: ('{}'' -> '')".format(
281
                birthplace
282
            )
283
        )
284
285
    return birthplace_code
286
287
288
def encode_cin(code):
289
    """
290
    Encodes cin to the code used in italian fiscal code.
291
292
    :param code: The code
293
    :type code: string
294
295
    :returns: The code used in italian fiscal code
296
    :rtype: string
297
    """
298
    if not code:
299
        raise ValueError("[codicefiscale] 'code' argument cant be None")
300
301
    code_len = len(code)
302
    if code_len not in [15, 16]:
303
        raise ValueError(
304
            "[codicefiscale] 'code' length must be 15 or 16, not: {}".format(code_len)
305
        )
306
307
    cin_tot = 0
308
    for i, char in enumerate(code[0:15]):
309
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
310
    cin_code = _CIN_REMAINDERS[cin_tot % 26]
311
312
    # print(cin_code)
313
    return cin_code
314
315
316
def encode(surname, name, sex, birthdate, birthplace):
317
    """
318
    Encodes the italian fiscal code.
319
320
    :param surname: The surname
321
    :type surname: string
322
    :param name: The name
323
    :type name: string
324
    :param sex: The sex, 'M' or 'F'
325
    :type sex: string
326
    :param birthdate: The birthdate
327
    :type birthdate: datetime or string
328
    :param birthplace: The birthplace
329
    :type birthplace: string
330
331
    :returns: The italian fiscal code
332
    :rtype: string
333
    """
334
    code = ""
335
    code += encode_surname(surname)
336
    code += encode_name(name)
337
    code += encode_birthdate(birthdate, sex)
338
    code += encode_birthplace(birthplace)
339
    code += encode_cin(code)
340
341
    # raise ValueError if code is not valid
342
    data = decode(code)
343
    return data["code"]
344
345
346
def decode_raw(code):
347
    """
348
    Decodes the raw data associated to the code.
349
350
    :param code: The code
351
    :type code: string
352
353
    :returns: The raw data associated to the code.
354
    :rtype: dict
355
    """
356
    code = slugify(code)
357
    code = code.replace("-", "")
358
    code = code.upper()
359
360
    m = CODICEFISCALE_RE.match(code)
361
    if not m:
362
        raise ValueError("[codicefiscale] invalid syntax: {}".format(code))
363
364
    g = m.groups()
365
    # print(g)
366
367
    data = {
368
        "code": code,
369
        "surname": g[0],
370
        "name": g[1],
371
        "birthdate": g[2],
372
        "birthdate_year": g[3],
373
        "birthdate_month": g[4],
374
        "birthdate_day": g[5],
375
        "birthplace": g[6],
376
        "cin": g[7],
377
    }
378
379
    return data
380
381
382
def decode(code):
383
    """
384
    Decodes the italian fiscal code.
385
386
    :param code: The code
387
    :type code: string
388
389
    :returns: The data associated to the code and some additional info.
390
    :rtype: dict
391
    """
392
    raw = decode_raw(code)
393
394
    code = raw["code"]
395
396
    birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS)
397
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
398
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
399
400
    if birthdate_day > 40:
401
        birthdate_day -= 40
402
        sex = "F"
403
    else:
404
        sex = "M"
405
406
    current_year = datetime.now().year
407
    birthdate_year_int = int("{}{}".format(str(current_year)[0:-2], birthdate_year))
408
    if birthdate_year_int > current_year:
409
        birthdate_year_int -= 100
410
    birthdate_year = str(birthdate_year_int)
411
    birthdate_str = "{}/{}/{}".format(birthdate_year, birthdate_month, birthdate_day)
412
    try:
413
        birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d")
414
    except ValueError:
415
        raise ValueError("[codicefiscale] invalid date: {}".format(birthdate_str))
416
417
    birthplace = _DATA["codes"].get(
418
        raw["birthplace"][0] + raw["birthplace"][1:].translate(_OMOCODIA_DECODE_TRANS)
419
    )
420
421
    cin = raw["cin"]
422
    cin_check = encode_cin(code)
423
    # print(cin, cin_check)
424
    if cin != cin_check:
425
        raise ValueError(
426
            "[codicefiscale] wrong CIN (Control Internal Number): expected '{}', found '{}'".format(
427
                cin_check, cin
428
            )
429
        )
430
431
    data = {
432
        "code": code,
433
        "omocodes": _get_omocodes(code),
434
        "sex": sex,
435
        "birthdate": birthdate,
436
        "birthplace": birthplace,
437
        "raw": raw,
438
    }
439
440
    # print(data)
441
    return data
442
443
444
def is_omocode(code):
445
    """
446
    Determines whether the specified code is omocode or not.
447
448
    :param code: The code
449
    :type code: string
450
451
    :returns: True if the specified code is omocode, False otherwise.
452
    :rtype: boolean
453
    """
454
    data = decode(code)
455
    codes = data["omocodes"]
456
    codes.pop(0)
457
    return code in codes
458
459
460
def is_valid(code):
461
    """
462
    Determines whether the specified code is valid.
463
464
    :param code: The code
465
    :type code: string
466
467
    :returns: True if the specified code is valid, False otherwise.
468
    :rtype: boolean
469
    """
470
    try:
471
        decode(code)
472
        return True
473
    except ValueError:
474
        return False
475