Passed
Push — master ( 510333...9fd88a )
by Fabio
01:07
created

codicefiscale.codicefiscale.encode()   A

Complexity

Conditions 1

Size

Total Lines 28
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 28
rs 9.95
c 0
b 0
f 0
cc 1
nop 5
1
# -*- coding: utf-8 -*-
2
3
from datetime import datetime
4
from dateutil import parser as date_parser
5
from itertools import combinations
6
from slugify import slugify
7
8
import fsutil
9
import re
10
import string
11
12
13
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz")
14
_VOWELS = list("aeiou")
15
_MONTHS = list("ABCDEHLMPRST")
16
_CIN = {
17
    "0": (0, 1),
18
    "1": (1, 0),
19
    "2": (2, 5),
20
    "3": (3, 7),
21
    "4": (4, 9),
22
    "5": (5, 13),
23
    "6": (6, 15),
24
    "7": (7, 17),
25
    "8": (8, 19),
26
    "9": (9, 21),
27
    "A": (0, 1),
28
    "B": (1, 0),
29
    "C": (2, 5),
30
    "D": (3, 7),
31
    "E": (4, 9),
32
    "F": (5, 13),
33
    "G": (6, 15),
34
    "H": (7, 17),
35
    "I": (8, 19),
36
    "J": (9, 21),
37
    "K": (10, 2),
38
    "L": (11, 4),
39
    "M": (12, 18),
40
    "N": (13, 20),
41
    "O": (14, 11),
42
    "P": (15, 3),
43
    "Q": (16, 6),
44
    "R": (17, 8),
45
    "S": (18, 12),
46
    "T": (19, 14),
47
    "U": (20, 16),
48
    "V": (21, 10),
49
    "W": (22, 22),
50
    "X": (23, 25),
51
    "Y": (24, 24),
52
    "Z": (25, 23),
53
}
54
_CIN_REMAINDERS = list(string.ascii_uppercase)
55
56
_OMOCODIA = {
57
    "0": "L",
58
    "1": "M",
59
    "2": "N",
60
    "3": "P",
61
    "4": "Q",
62
    "5": "R",
63
    "6": "S",
64
    "7": "T",
65
    "8": "U",
66
    "9": "V",
67
}
68
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA])
69
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
70
_OMOCODIA_ENCODE_TRANS = "".maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS)
71
_OMOCODIA_DECODE_TRANS = "".maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS)
72
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14]))
73
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]]
74
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
75
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
76
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
77
78
79
def _get_data(filename):
80
    return fsutil.read_file_json(fsutil.join_path(__file__, f"data/{filename}"))
81
82
83
def _get_indexed_data():
84
    municipalities = _get_data("municipalities.json")
85
    countries = _get_data("countries.json")
86
    data = {
87
        "municipalities": {},
88
        "countries": {},
89
        "codes": {},
90
    }
91
92
    for municipality in municipalities:
93
        code = municipality["code"]
94
        province = municipality["province"].lower()
95
        names = municipality["name_slugs"]
96
        for name in names:
97
            data["municipalities"][name] = municipality
98
            data["municipalities"][name + "-" + province] = municipality
99
        data["codes"].setdefault(code, [])
100
        data["codes"][code].append(municipality)
101
102
    for country in countries:
103
        code = country["code"]
104
        names = country["name_slugs"]
105
        for name in names:
106
            data["countries"][name] = country
107
        data["codes"].setdefault(code, [])
108
        data["codes"][code].append(country)
109
110
    return data
111
112
113
_DATA = _get_indexed_data()
114
115
CODICEFISCALE_RE = re.compile(
116
    r"^"
117
    r"([a-z]{3})"
118
    r"([a-z]{3})"
119
    r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))"
120
    r"([a-z]{1}[a-z\d]{3})"
121
    r"([a-z]{1})$",
122
    re.IGNORECASE,
123
)
124
125
126
def _get_consonants(s):
127
    return [char for char in s if char in _CONSONANTS]
128
129
130
def _get_vowels(s):
131
    return [char for char in s if char in _VOWELS]
132
133
134
def _get_consonants_and_vowels(consonants, vowels):
135
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
136
137
138
def _get_omocode(code, subs, trans):
139
    code_chars = list(code[0:15])
140
    for i in subs:
141
        code_chars[i] = code_chars[i].translate(trans)
142
    code = "".join(code_chars)
143
    code_cin = encode_cin(code)
144
    code += code_cin
145
    return code
146
147
148
def _get_omocodes(code):
149
    code_root = _get_omocode(
150
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
151
    )
152
    codes = [
153
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
154
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
155
    ]
156
    return codes
157
158
159
def _get_date(s):
160
    try:
161
        value = datetime.strptime(s, "%Y-%m-%d")
162
    except ValueError:
163
        value = None
164
    return value
165
166
167
def encode_surname(surname):
168
    """
169
    Encode surname to the code used in italian fiscal code.
170
171
    :param surname: The surname
172
    :type surname: string
173
174
    :returns: The code used in italian fiscal code
175
    :rtype: string
176
    """
177
    surname_slug = slugify(surname)
178
    surname_consonants = _get_consonants(surname_slug)
179
    surname_vowels = _get_vowels(surname_slug)
180
    surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels)
181
    return surname_code
182
183
184
def encode_name(name):
185
    """
186
    Encodes name to the code used in italian fiscal code.
187
188
    :param name: The name
189
    :type name: string
190
191
    :returns: The code used in italian fiscal code
192
    :rtype: string
193
    """
194
    name_slug = slugify(name)
195
    name_consonants = _get_consonants(name_slug)
196
197
    if len(name_consonants) > 3:
198
        del name_consonants[1]
199
200
    name_vowels = _get_vowels(name_slug)
201
    name_code = _get_consonants_and_vowels(name_consonants, name_vowels)
202
    return name_code
203
204
205
def encode_birthdate(birthdate, sex):
206
    """
207
    Encodes birthdate to the code used in italian fiscal code.
208
209
    :param birthdate: The birthdate
210
    :type birthdate: datetime or string
211
    :param sex: The sex, 'M' or 'F'
212
    :type sex: string
213
214
    :returns: The code used in italian fiscal code
215
    :rtype: string
216
    """
217
    if not birthdate:
218
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
219
220
    if not sex:
221
        raise ValueError("[codicefiscale] 'sex' argument cant be None")
222
223
    sex = sex.upper()
224
225
    if sex not in ["M", "F"]:
226
        raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'")
227
228
    if isinstance(birthdate, datetime):
229
        date_obj = birthdate
230
    else:
231
        date_slug = slugify(birthdate)
232
        date_parts = date_slug.split("-")[:3]
233
        date_kwargs = (
234
            {"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True}
235
        )
236
        try:
237
            date_obj = date_parser.parse(date_slug, **date_kwargs)
238
        except ValueError as e:
239
            raise ValueError(f"[codicefiscale] {e}")
240
241
    year_code = str(date_obj.year)[2:]
242
    month_code = _MONTHS[date_obj.month - 1]
243
    day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper()
244
    date_code = year_code + month_code + day_code
245
    return date_code
246
247
248
def encode_birthplace(birthplace):
249
    """
250
    Encodes birthplace to the code used in italian fiscal code.
251
252
    :param birthplace: The birthplace
253
    :type birthplace: string
254
255
    :returns: The code used in italian fiscal code
256
    :rtype: string
257
    """
258
    if not birthplace:
259
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
260
261
    def find_birthplace_code(birthplace):
262
        birthplace_slug = slugify(birthplace)
263
        birthplace_code = birthplace_slug.upper()
264
        birthplace_data = _DATA["municipalities"].get(
265
            birthplace_slug,
266
            _DATA["countries"].get(birthplace_slug),
267
        )
268
        if birthplace_data:
269
            return birthplace_data.get("code", "")
270
        elif birthplace_code in _DATA["codes"]:
271
            return birthplace_code
272
        else:
273
            return ""
274
275
    birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code(
276
        re.split(r",|\(", birthplace)[0]
277
    )
278
279
    if birthplace_code == "":
280
        raise ValueError(
281
            f"[codicefiscale] 'birthplace' argument not mapped to code: ('{birthplace}' -> '')"
282
        )
283
284
    return birthplace_code
285
286
287
def encode_cin(code):
288
    """
289
    Encodes cin to the code used in italian fiscal code.
290
291
    :param code: The code
292
    :type code: string
293
294
    :returns: The code used in italian fiscal code
295
    :rtype: string
296
    """
297
    if not code:
298
        raise ValueError("[codicefiscale] 'code' argument cant be None")
299
300
    code_len = len(code)
301
    if code_len not in [15, 16]:
302
        raise ValueError(
303
            f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
304
        )
305
306
    cin_tot = 0
307
    for i, char in enumerate(code[0:15]):
308
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
309
    cin_code = _CIN_REMAINDERS[cin_tot % 26]
310
311
    # print(cin_code)
312
    return cin_code
313
314
315
def encode(surname, name, sex, birthdate, birthplace):
316
    """
317
    Encodes the italian fiscal code.
318
319
    :param surname: The surname
320
    :type surname: string
321
    :param name: The name
322
    :type name: string
323
    :param sex: The sex, 'M' or 'F'
324
    :type sex: string
325
    :param birthdate: The birthdate
326
    :type birthdate: datetime or string
327
    :param birthplace: The birthplace
328
    :type birthplace: string
329
330
    :returns: The italian fiscal code
331
    :rtype: string
332
    """
333
    code = ""
334
    code += encode_surname(surname)
335
    code += encode_name(name)
336
    code += encode_birthdate(birthdate, sex)
337
    code += encode_birthplace(birthplace)
338
    code += encode_cin(code)
339
340
    # raise ValueError if code is not valid
341
    data = decode(code)
342
    return data["code"]
343
344
345
def decode_raw(code):
346
    """
347
    Decodes the raw data associated to the code.
348
349
    :param code: The code
350
    :type code: string
351
352
    :returns: The raw data associated to the code.
353
    :rtype: dict
354
    """
355
    code = slugify(code)
356
    code = code.replace("-", "")
357
    code = code.upper()
358
359
    m = CODICEFISCALE_RE.match(code)
360
    if not m:
361
        raise ValueError(f"[codicefiscale] invalid syntax: {code}")
362
363
    g = m.groups()
364
    # print(g)
365
366
    data = {
367
        "code": code,
368
        "surname": g[0],
369
        "name": g[1],
370
        "birthdate": g[2],
371
        "birthdate_year": g[3],
372
        "birthdate_month": g[4],
373
        "birthdate_day": g[5],
374
        "birthplace": g[6],
375
        "cin": g[7],
376
    }
377
378
    return data
379
380
381
def decode(code):
382
    """
383
    Decodes the italian fiscal code.
384
385
    :param code: The code
386
    :type code: string
387
388
    :returns: The data associated to the code and some additional info.
389
    :rtype: dict
390
    """
391
    raw = decode_raw(code)
392
393
    code = raw["code"]
394
395
    birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS)
396
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
397
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
398
399
    if birthdate_day > 40:
400
        birthdate_day -= 40
401
        sex = "F"
402
    else:
403
        sex = "M"
404
405
    current_year = datetime.now().year
406
    current_year_century_prefix = str(current_year)[0:-2]
407
    birthdate_year_int = int(f"{current_year_century_prefix}{birthdate_year}")
408
    if birthdate_year_int > current_year:
409
        birthdate_year_int -= 100
410
    birthdate_year = str(birthdate_year_int)
411
    birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
412
    try:
413
        birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d")
414
    except ValueError:
415
        raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")
416
417
    birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate(
418
        _OMOCODIA_DECODE_TRANS
419
    )
420
    birthplace = None
421
    birthplaces_options = _DATA["codes"].get(birthplace_code)
422
    for birthplace_option in birthplaces_options:
423
        date_created = _get_date(birthplace_option["date_created"]) or datetime.min
424
        date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
425
        if birthdate >= date_created and birthdate <= date_deleted:
426
            birthplace = birthplace_option.copy()
427
            break
428
429
    cin = raw["cin"]
430
    cin_check = encode_cin(code)
431
    # print(cin, cin_check)
432
    if cin != cin_check:
433
        raise ValueError(
434
            f"[codicefiscale] wrong CIN (Control Internal Number): expected '{cin_check}', found '{cin}'"
435
        )
436
437
    data = {
438
        "code": code,
439
        "omocodes": _get_omocodes(code),
440
        "sex": sex,
441
        "birthdate": birthdate,
442
        "birthplace": birthplace,
443
        "raw": raw,
444
    }
445
446
    # print(data)
447
    return data
448
449
450
def is_omocode(code):
451
    """
452
    Determines whether the specified code is omocode or not.
453
454
    :param code: The code
455
    :type code: string
456
457
    :returns: True if the specified code is omocode, False otherwise.
458
    :rtype: boolean
459
    """
460
    data = decode(code)
461
    codes = data["omocodes"]
462
    codes.pop(0)
463
    return code in codes
464
465
466
def is_valid(code):
467
    """
468
    Determines whether the specified code is valid.
469
470
    :param code: The code
471
    :type code: string
472
473
    :returns: True if the specified code is valid, False otherwise.
474
    :rtype: boolean
475
    """
476
    try:
477
        decode(code)
478
        return True
479
    except ValueError:
480
        return False
481