Passed
Push — master ( c73456...7c6ed3 )
by Fabio
01:00
created

codicefiscale.codicefiscale._get_indexed_data()   B

Complexity

Conditions 7

Size

Total Lines 29
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 26
nop 0
dl 0
loc 29
rs 7.856
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
from datetime import datetime
4
from dateutil import parser as date_parser
5
from itertools import combinations
6
from slugify import slugify
7
8
import fsutil
9
import re
10
import string
11
12
try:
13
    maketrans = "".maketrans
14
except AttributeError:
15
    # fallback for Python 2
16
    maketrans = string.maketrans
17
18
19
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz")
20
_VOWELS = list("aeiou")
21
_MONTHS = list("ABCDEHLMPRST")
22
_CIN = {
23
    "0": (0, 1),
24
    "1": (1, 0),
25
    "2": (2, 5),
26
    "3": (3, 7),
27
    "4": (4, 9),
28
    "5": (5, 13),
29
    "6": (6, 15),
30
    "7": (7, 17),
31
    "8": (8, 19),
32
    "9": (9, 21),
33
    "A": (0, 1),
34
    "B": (1, 0),
35
    "C": (2, 5),
36
    "D": (3, 7),
37
    "E": (4, 9),
38
    "F": (5, 13),
39
    "G": (6, 15),
40
    "H": (7, 17),
41
    "I": (8, 19),
42
    "J": (9, 21),
43
    "K": (10, 2),
44
    "L": (11, 4),
45
    "M": (12, 18),
46
    "N": (13, 20),
47
    "O": (14, 11),
48
    "P": (15, 3),
49
    "Q": (16, 6),
50
    "R": (17, 8),
51
    "S": (18, 12),
52
    "T": (19, 14),
53
    "U": (20, 16),
54
    "V": (21, 10),
55
    "W": (22, 22),
56
    "X": (23, 25),
57
    "Y": (24, 24),
58
    "Z": (25, 23),
59
}
60
_CIN_REMAINDERS = list(string.ascii_uppercase)
61
62
_OMOCODIA = {
63
    "0": "L",
64
    "1": "M",
65
    "2": "N",
66
    "3": "P",
67
    "4": "Q",
68
    "5": "R",
69
    "6": "S",
70
    "7": "T",
71
    "8": "U",
72
    "9": "V",
73
}
74
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA])
75
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
76
_OMOCODIA_ENCODE_TRANS = maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS)
77
_OMOCODIA_DECODE_TRANS = maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS)
78
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14]))
79
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]]
80
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
81
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
82
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
83
84
85
def _get_data(filename):
86
    return fsutil.read_file_json(fsutil.join_path(__file__, "data/{}".format(filename)))
87
88
89
def _get_indexed_data():
90
    municipalities = _get_data("municipalities.json")
91
    countries = _get_data("countries.json")
92
    data = {
93
        "municipalities": {},
94
        "countries": {},
95
        "codes": {},
96
    }
97
    deleted_suffix = "(soppresso)"
98
    for municipality in municipalities:
99
        code = municipality["code"]
100
        names = municipality["name"].replace(deleted_suffix, "").strip().split("/")
101
        province = municipality["province"].lower()
102
        for name in names:
103
            key = slugify(name)
104
            data["municipalities"][key] = municipality
105
            data["municipalities"][key + "-" + province] = municipality
106
        if code not in data["codes"] or deleted_suffix not in municipality["name"]:
107
            data["codes"][code] = municipality
108
109
    for country in countries:
110
        code = country["code"]
111
        names = country["name"].strip().split("/")
112
        for name in names:
113
            key = slugify(name)
114
            data["countries"][key] = country
115
        data["codes"][code] = country
116
117
    return data
118
119
120
_DATA = _get_indexed_data()
121
122
CODICEFISCALE_RE = re.compile(
123
    r"^"
124
    r"([a-z]{3})"
125
    r"([a-z]{3})"
126
    r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))"
127
    r"([a-z]{1}[a-z\d]{3})"
128
    r"([a-z]{1})$",
129
    re.IGNORECASE,
130
)
131
132
133
def _get_consonants(s):
134
    return [char for char in s if char in _CONSONANTS]
135
136
137
def _get_vowels(s):
138
    return [char for char in s if char in _VOWELS]
139
140
141
def _get_consonants_and_vowels(consonants, vowels):
142
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
143
144
145
def _get_omocode(code, subs, trans):
146
    code_chars = list(code[0:15])
147
    for i in subs:
148
        code_chars[i] = code_chars[i].translate(trans)
149
    code = "".join(code_chars)
150
    code_cin = encode_cin(code)
151
    code += code_cin
152
    return code
153
154
155
def _get_omocodes(code):
156
    code_root = _get_omocode(
157
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
158
    )
159
    codes = [
160
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
161
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
162
    ]
163
    return codes
164
165
166
def encode_surname(surname):
167
    """
168
    Encode surname to the code used in italian fiscal code.
169
170
    :param surname: The surname
171
    :type surname: string
172
173
    :returns: The code used in italian fiscal code
174
    :rtype: string
175
    """
176
    surname_slug = slugify(surname)
177
    surname_consonants = _get_consonants(surname_slug)
178
    surname_vowels = _get_vowels(surname_slug)
179
    surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels)
180
    return surname_code
181
182
183
def encode_name(name):
184
    """
185
    Encodes name to the code used in italian fiscal code.
186
187
    :param name: The name
188
    :type name: string
189
190
    :returns: The code used in italian fiscal code
191
    :rtype: string
192
    """
193
    name_slug = slugify(name)
194
    name_consonants = _get_consonants(name_slug)
195
196
    if len(name_consonants) > 3:
197
        del name_consonants[1]
198
199
    name_vowels = _get_vowels(name_slug)
200
    name_code = _get_consonants_and_vowels(name_consonants, name_vowels)
201
    return name_code
202
203
204
def encode_birthdate(birthdate, sex):
205
    """
206
    Encodes birthdate to the code used in italian fiscal code.
207
208
    :param birthdate: The birthdate
209
    :type birthdate: datetime or string
210
    :param sex: The sex, 'M' or 'F'
211
    :type sex: string
212
213
    :returns: The code used in italian fiscal code
214
    :rtype: string
215
    """
216
    if not birthdate:
217
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
218
219
    if not sex:
220
        raise ValueError("[codicefiscale] 'sex' argument cant be None")
221
222
    sex = sex.upper()
223
224
    if sex not in ["M", "F"]:
225
        raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'")
226
227
    if isinstance(birthdate, datetime):
228
        date_obj = birthdate
229
    else:
230
        date_slug = slugify(birthdate)
231
        date_parts = date_slug.split("-")[:3]
232
        date_kwargs = (
233
            {"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True}
234
        )
235
        try:
236
            date_obj = date_parser.parse(date_slug, **date_kwargs)
237
        except ValueError as e:
238
            raise ValueError("[codicefiscale] {}".format(e))
239
240
    year_code = str(date_obj.year)[2:]
241
    month_code = _MONTHS[date_obj.month - 1]
242
    day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper()
243
    date_code = year_code + month_code + day_code
244
    return date_code
245
246
247
def encode_birthplace(birthplace):
248
    """
249
    Encodes birthplace to the code used in italian fiscal code.
250
251
    :param birthplace: The birthplace
252
    :type birthplace: string
253
254
    :returns: The code used in italian fiscal code
255
    :rtype: string
256
    """
257
    if not birthplace:
258
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
259
260
    def find_birthplace_code(birthplace):
261
        birthplace_slug = slugify(birthplace)
262
        birthplace_code = birthplace_slug.upper()
263
        birthplace_data = _DATA["municipalities"].get(
264
            birthplace_slug,
265
            _DATA["countries"].get(
266
                birthplace_slug, _DATA["codes"].get(birthplace_code, {})
267
            ),
268
        )
269
        return birthplace_data.get("code", "")
270
271
    birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code(
272
        re.split(r",|\(", birthplace)[0]
273
    )
274
275
    if birthplace_code == "":
276
        raise ValueError(
277
            "[codicefiscale] 'birthplace' argument not mapped to code: ('{}'' -> '')".format(
278
                birthplace
279
            )
280
        )
281
282
    return birthplace_code
283
284
285
def encode_cin(code):
286
    """
287
    Encodes cin to the code used in italian fiscal code.
288
289
    :param code: The code
290
    :type code: string
291
292
    :returns: The code used in italian fiscal code
293
    :rtype: string
294
    """
295
    if not code:
296
        raise ValueError("[codicefiscale] 'code' argument cant be None")
297
298
    code_len = len(code)
299
    if code_len not in [15, 16]:
300
        raise ValueError(
301
            "[codicefiscale] 'code' length must be 15 or 16, not: {}".format(code_len)
302
        )
303
304
    cin_tot = 0
305
    for i, char in enumerate(code[0:15]):
306
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
307
    cin_code = _CIN_REMAINDERS[cin_tot % 26]
308
309
    # print(cin_code)
310
    return cin_code
311
312
313
def encode(surname, name, sex, birthdate, birthplace):
314
    """
315
    Encodes the italian fiscal code.
316
317
    :param surname: The surname
318
    :type surname: string
319
    :param name: The name
320
    :type name: string
321
    :param sex: The sex, 'M' or 'F'
322
    :type sex: string
323
    :param birthdate: The birthdate
324
    :type birthdate: datetime or string
325
    :param birthplace: The birthplace
326
    :type birthplace: string
327
328
    :returns: The italian fiscal code
329
    :rtype: string
330
    """
331
    code = ""
332
    code += encode_surname(surname)
333
    code += encode_name(name)
334
    code += encode_birthdate(birthdate, sex)
335
    code += encode_birthplace(birthplace)
336
    code += encode_cin(code)
337
338
    # raise ValueError if code is not valid
339
    data = decode(code)
340
    return data["code"]
341
342
343
def decode_raw(code):
344
    """
345
    Decodes the raw data associated to the code.
346
347
    :param code: The code
348
    :type code: string
349
350
    :returns: The raw data associated to the code.
351
    :rtype: dict
352
    """
353
    code = slugify(code)
354
    code = code.replace("-", "")
355
    code = code.upper()
356
357
    m = CODICEFISCALE_RE.match(code)
358
    if not m:
359
        raise ValueError("[codicefiscale] invalid syntax: {}".format(code))
360
361
    g = m.groups()
362
    # print(g)
363
364
    data = {
365
        "code": code,
366
        "surname": g[0],
367
        "name": g[1],
368
        "birthdate": g[2],
369
        "birthdate_year": g[3],
370
        "birthdate_month": g[4],
371
        "birthdate_day": g[5],
372
        "birthplace": g[6],
373
        "cin": g[7],
374
    }
375
376
    return data
377
378
379
def decode(code):
380
    """
381
    Decodes the italian fiscal code.
382
383
    :param code: The code
384
    :type code: string
385
386
    :returns: The data associated to the code and some additional info.
387
    :rtype: dict
388
    """
389
    raw = decode_raw(code)
390
391
    code = raw["code"]
392
393
    birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS)
394
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
395
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
396
397
    if birthdate_day > 40:
398
        birthdate_day -= 40
399
        sex = "F"
400
    else:
401
        sex = "M"
402
403
    current_year = datetime.now().year
404
    birthdate_year_int = int("{}{}".format(str(current_year)[0:-2], birthdate_year))
405
    if birthdate_year_int > current_year:
406
        birthdate_year_int -= 100
407
    birthdate_year = str(birthdate_year_int)
408
    birthdate_str = "{}/{}/{}".format(birthdate_year, birthdate_month, birthdate_day)
409
    try:
410
        birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d")
411
    except ValueError:
412
        raise ValueError("[codicefiscale] invalid date: {}".format(birthdate_str))
413
414
    birthplace = _DATA["codes"].get(
415
        raw["birthplace"][0] + raw["birthplace"][1:].translate(_OMOCODIA_DECODE_TRANS)
416
    )
417
418
    cin = raw["cin"]
419
    cin_check = encode_cin(code)
420
    # print(cin, cin_check)
421
    if cin != cin_check:
422
        raise ValueError(
423
            "[codicefiscale] wrong CIN (Control Internal Number): expected '{}', found '{}'".format(
424
                cin_check, cin
425
            )
426
        )
427
428
    data = {
429
        "code": code,
430
        "omocodes": _get_omocodes(code),
431
        "sex": sex,
432
        "birthdate": birthdate,
433
        "birthplace": birthplace,
434
        "raw": raw,
435
    }
436
437
    # print(data)
438
    return data
439
440
441
def is_omocode(code):
442
    """
443
    Determines whether the specified code is omocode or not.
444
445
    :param code: The code
446
    :type code: string
447
448
    :returns: True if the specified code is omocode, False otherwise.
449
    :rtype: boolean
450
    """
451
    data = decode(code)
452
    codes = data["omocodes"]
453
    codes.pop(0)
454
    return code in codes
455
456
457
def is_valid(code):
458
    """
459
    Determines whether the specified code is valid.
460
461
    :param code: The code
462
    :type code: string
463
464
    :returns: True if the specified code is valid, False otherwise.
465
    :rtype: boolean
466
    """
467
    try:
468
        decode(code)
469
        return True
470
    except ValueError:
471
        return False
472