Passed
Push — master ( 4d8322...510333 )
by Fabio
01:08
created

codicefiscale.codicefiscale.decode()   C

Complexity

Conditions 10

Size

Total Lines 72
Code Lines 49

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 49
dl 0
loc 72
rs 5.869
c 0
b 0
f 0
cc 10
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like codicefiscale.codicefiscale.decode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
from datetime import datetime
4
from dateutil import parser as date_parser
5
from itertools import combinations
6
from slugify import slugify
7
8
import fsutil
9
import re
10
import string
11
12
13
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz")
14
_VOWELS = list("aeiou")
15
_MONTHS = list("ABCDEHLMPRST")
16
_CIN = {
17
    "0": (0, 1),
18
    "1": (1, 0),
19
    "2": (2, 5),
20
    "3": (3, 7),
21
    "4": (4, 9),
22
    "5": (5, 13),
23
    "6": (6, 15),
24
    "7": (7, 17),
25
    "8": (8, 19),
26
    "9": (9, 21),
27
    "A": (0, 1),
28
    "B": (1, 0),
29
    "C": (2, 5),
30
    "D": (3, 7),
31
    "E": (4, 9),
32
    "F": (5, 13),
33
    "G": (6, 15),
34
    "H": (7, 17),
35
    "I": (8, 19),
36
    "J": (9, 21),
37
    "K": (10, 2),
38
    "L": (11, 4),
39
    "M": (12, 18),
40
    "N": (13, 20),
41
    "O": (14, 11),
42
    "P": (15, 3),
43
    "Q": (16, 6),
44
    "R": (17, 8),
45
    "S": (18, 12),
46
    "T": (19, 14),
47
    "U": (20, 16),
48
    "V": (21, 10),
49
    "W": (22, 22),
50
    "X": (23, 25),
51
    "Y": (24, 24),
52
    "Z": (25, 23),
53
}
54
_CIN_REMAINDERS = list(string.ascii_uppercase)
55
56
_OMOCODIA = {
57
    "0": "L",
58
    "1": "M",
59
    "2": "N",
60
    "3": "P",
61
    "4": "Q",
62
    "5": "R",
63
    "6": "S",
64
    "7": "T",
65
    "8": "U",
66
    "9": "V",
67
}
68
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA])
69
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
70
_OMOCODIA_ENCODE_TRANS = "".maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS)
71
_OMOCODIA_DECODE_TRANS = "".maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS)
72
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14]))
73
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]]
74
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
75
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
76
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
77
78
79
def _get_data(filename):
80
    return fsutil.read_file_json(fsutil.join_path(__file__, f"data/{filename}"))
81
82
83
def _get_indexed_data():
84
    municipalities = _get_data("municipalities.json")
85
    countries = _get_data("countries.json")
86
    data = {
87
        "municipalities": {},
88
        "countries": {},
89
        "codes": {},
90
    }
91
92
    for municipality in municipalities:
93
        code = municipality["code"]
94
        province = municipality["province"].lower()
95
        names = municipality["name_slugs"]
96
        for name in names:
97
            data["municipalities"][name] = municipality
98
            data["municipalities"][name + "-" + province] = municipality
99
        if code not in data["codes"]:
100
            data["codes"][code] = []
101
        data["codes"][code].append(municipality)
102
103
    for country in countries:
104
        code = country["code"]
105
        names = country["name_slugs"]
106
        for name in names:
107
            data["countries"][name] = country
108
        if code not in data["codes"]:
109
            data["codes"][code] = []
110
        data["codes"][code].append(country)
111
112
    return data
113
114
115
_DATA = _get_indexed_data()
116
117
CODICEFISCALE_RE = re.compile(
118
    r"^"
119
    r"([a-z]{3})"
120
    r"([a-z]{3})"
121
    r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))"
122
    r"([a-z]{1}[a-z\d]{3})"
123
    r"([a-z]{1})$",
124
    re.IGNORECASE,
125
)
126
127
128
def _get_consonants(s):
129
    return [char for char in s if char in _CONSONANTS]
130
131
132
def _get_vowels(s):
133
    return [char for char in s if char in _VOWELS]
134
135
136
def _get_consonants_and_vowels(consonants, vowels):
137
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
138
139
140
def _get_omocode(code, subs, trans):
141
    code_chars = list(code[0:15])
142
    for i in subs:
143
        code_chars[i] = code_chars[i].translate(trans)
144
    code = "".join(code_chars)
145
    code_cin = encode_cin(code)
146
    code += code_cin
147
    return code
148
149
150
def _get_omocodes(code):
151
    code_root = _get_omocode(
152
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
153
    )
154
    codes = [
155
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
156
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
157
    ]
158
    return codes
159
160
161
def encode_surname(surname):
162
    """
163
    Encode surname to the code used in italian fiscal code.
164
165
    :param surname: The surname
166
    :type surname: string
167
168
    :returns: The code used in italian fiscal code
169
    :rtype: string
170
    """
171
    surname_slug = slugify(surname)
172
    surname_consonants = _get_consonants(surname_slug)
173
    surname_vowels = _get_vowels(surname_slug)
174
    surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels)
175
    return surname_code
176
177
178
def encode_name(name):
179
    """
180
    Encodes name to the code used in italian fiscal code.
181
182
    :param name: The name
183
    :type name: string
184
185
    :returns: The code used in italian fiscal code
186
    :rtype: string
187
    """
188
    name_slug = slugify(name)
189
    name_consonants = _get_consonants(name_slug)
190
191
    if len(name_consonants) > 3:
192
        del name_consonants[1]
193
194
    name_vowels = _get_vowels(name_slug)
195
    name_code = _get_consonants_and_vowels(name_consonants, name_vowels)
196
    return name_code
197
198
199
def encode_birthdate(birthdate, sex):
200
    """
201
    Encodes birthdate to the code used in italian fiscal code.
202
203
    :param birthdate: The birthdate
204
    :type birthdate: datetime or string
205
    :param sex: The sex, 'M' or 'F'
206
    :type sex: string
207
208
    :returns: The code used in italian fiscal code
209
    :rtype: string
210
    """
211
    if not birthdate:
212
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
213
214
    if not sex:
215
        raise ValueError("[codicefiscale] 'sex' argument cant be None")
216
217
    sex = sex.upper()
218
219
    if sex not in ["M", "F"]:
220
        raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'")
221
222
    if isinstance(birthdate, datetime):
223
        date_obj = birthdate
224
    else:
225
        date_slug = slugify(birthdate)
226
        date_parts = date_slug.split("-")[:3]
227
        date_kwargs = (
228
            {"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True}
229
        )
230
        try:
231
            date_obj = date_parser.parse(date_slug, **date_kwargs)
232
        except ValueError as e:
233
            raise ValueError(f"[codicefiscale] {e}")
234
235
    year_code = str(date_obj.year)[2:]
236
    month_code = _MONTHS[date_obj.month - 1]
237
    day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper()
238
    date_code = year_code + month_code + day_code
239
    return date_code
240
241
242
def encode_birthplace(birthplace):
243
    """
244
    Encodes birthplace to the code used in italian fiscal code.
245
246
    :param birthplace: The birthplace
247
    :type birthplace: string
248
249
    :returns: The code used in italian fiscal code
250
    :rtype: string
251
    """
252
    if not birthplace:
253
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
254
255
    def find_birthplace_code(birthplace):
256
        birthplace_slug = slugify(birthplace)
257
        birthplace_code = birthplace_slug.upper()
258
        birthplace_data = _DATA["municipalities"].get(
259
            birthplace_slug,
260
            _DATA["countries"].get(
261
                birthplace_slug
262
            ),
263
        )
264
        if birthplace_data:
265
            return birthplace_data.get("code", "")
266
        if birthplace_code in _DATA["codes"]:
267
            return birthplace_code
268
        return ""
269
270
    birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code(
271
        re.split(r",|\(", birthplace)[0]
272
    )
273
274
    if birthplace_code == "":
275
        raise ValueError(
276
            f"[codicefiscale] 'birthplace' argument not mapped to code: ('{birthplace}' -> '')"
277
        )
278
279
    return birthplace_code
280
281
282
def encode_cin(code):
283
    """
284
    Encodes cin to the code used in italian fiscal code.
285
286
    :param code: The code
287
    :type code: string
288
289
    :returns: The code used in italian fiscal code
290
    :rtype: string
291
    """
292
    if not code:
293
        raise ValueError("[codicefiscale] 'code' argument cant be None")
294
295
    code_len = len(code)
296
    if code_len not in [15, 16]:
297
        raise ValueError(
298
            f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
299
        )
300
301
    cin_tot = 0
302
    for i, char in enumerate(code[0:15]):
303
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
304
    cin_code = _CIN_REMAINDERS[cin_tot % 26]
305
306
    # print(cin_code)
307
    return cin_code
308
309
310
def encode(surname, name, sex, birthdate, birthplace):
311
    """
312
    Encodes the italian fiscal code.
313
314
    :param surname: The surname
315
    :type surname: string
316
    :param name: The name
317
    :type name: string
318
    :param sex: The sex, 'M' or 'F'
319
    :type sex: string
320
    :param birthdate: The birthdate
321
    :type birthdate: datetime or string
322
    :param birthplace: The birthplace
323
    :type birthplace: string
324
325
    :returns: The italian fiscal code
326
    :rtype: string
327
    """
328
    code = ""
329
    code += encode_surname(surname)
330
    code += encode_name(name)
331
    code += encode_birthdate(birthdate, sex)
332
    code += encode_birthplace(birthplace)
333
    code += encode_cin(code)
334
335
    # raise ValueError if code is not valid
336
    data = decode(code)
337
    return data["code"]
338
339
340
def decode_raw(code):
341
    """
342
    Decodes the raw data associated to the code.
343
344
    :param code: The code
345
    :type code: string
346
347
    :returns: The raw data associated to the code.
348
    :rtype: dict
349
    """
350
    code = slugify(code)
351
    code = code.replace("-", "")
352
    code = code.upper()
353
354
    m = CODICEFISCALE_RE.match(code)
355
    if not m:
356
        raise ValueError(f"[codicefiscale] invalid syntax: {code}")
357
358
    g = m.groups()
359
    # print(g)
360
361
    data = {
362
        "code": code,
363
        "surname": g[0],
364
        "name": g[1],
365
        "birthdate": g[2],
366
        "birthdate_year": g[3],
367
        "birthdate_month": g[4],
368
        "birthdate_day": g[5],
369
        "birthplace": g[6],
370
        "cin": g[7],
371
    }
372
373
    return data
374
375
376
def decode(code):
377
    """
378
    Decodes the italian fiscal code.
379
380
    :param code: The code
381
    :type code: string
382
383
    :returns: The data associated to the code and some additional info.
384
    :rtype: dict
385
    """
386
    raw = decode_raw(code)
387
388
    code = raw["code"]
389
390
    birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS)
391
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
392
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
393
394
    if birthdate_day > 40:
395
        birthdate_day -= 40
396
        sex = "F"
397
    else:
398
        sex = "M"
399
400
    current_year = datetime.now().year
401
    current_year_century_prefix = str(current_year)[0:-2]
402
    birthdate_year_int = int(f"{current_year_century_prefix}{birthdate_year}")
403
    if birthdate_year_int > current_year:
404
        birthdate_year_int -= 100
405
    birthdate_year = str(birthdate_year_int)
406
    birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
407
    try:
408
        birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d")
409
    except ValueError:
410
        raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")
411
412
    codes = _DATA["codes"].get(raw["birthplace"][0] + raw["birthplace"][1:].translate(_OMOCODIA_DECODE_TRANS))
413
    birthplace = None
414
    for c in codes:
415
        date_created = datetime.min
416
        try:
417
            date_created = datetime.strptime(c['date_created'], "%Y-%m-%d")
418
        except ValueError:
419
            date_created = datetime.min
420
421
        try:
422
            date_deleted = datetime.strptime(c['date_deleted'], "%Y-%m-%d")
423
        except ValueError:
424
            date_deleted = datetime.max
425
        if date_created <= birthdate and date_deleted >= birthdate:
426
            birthplace = c
427
            break
428
429
    cin = raw["cin"]
430
    cin_check = encode_cin(code)
431
    # print(cin, cin_check)
432
    if cin != cin_check:
433
        raise ValueError(
434
            f"[codicefiscale] wrong CIN (Control Internal Number): expected '{cin_check}', found '{cin}'"
435
        )
436
437
    data = {
438
        "code": code,
439
        "omocodes": _get_omocodes(code),
440
        "sex": sex,
441
        "birthdate": birthdate,
442
        "birthplace": birthplace,
443
        "raw": raw,
444
    }
445
446
    # print(data)
447
    return data
448
449
450
def is_omocode(code):
451
    """
452
    Determines whether the specified code is omocode or not.
453
454
    :param code: The code
455
    :type code: string
456
457
    :returns: True if the specified code is omocode, False otherwise.
458
    :rtype: boolean
459
    """
460
    data = decode(code)
461
    codes = data["omocodes"]
462
    codes.pop(0)
463
    return code in codes
464
465
466
def is_valid(code):
467
    """
468
    Determines whether the specified code is valid.
469
470
    :param code: The code
471
    :type code: string
472
473
    :returns: True if the specified code is valid, False otherwise.
474
    :rtype: boolean
475
    """
476
    try:
477
        decode(code)
478
        return True
479
    except ValueError:
480
        return False
481