Passed
Push — master ( cea790...6c49ab )
by Fabio
01:06
created

codicefiscale.codicefiscale.encode_birthplace()   A

Complexity

Conditions 3

Size

Total Lines 26
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 12
dl 0
loc 26
rs 9.8
c 0
b 0
f 0
cc 3
nop 2
1
import re
2
import string
3
from datetime import datetime
4
from itertools import combinations
5
6
import fsutil
7
from dateutil import parser as date_parser
8
from slugify import slugify
9
10
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz")
11
_VOWELS = list("aeiou")
12
_MONTHS = list("ABCDEHLMPRST")
13
_CIN = {
14
    "0": (0, 1),
15
    "1": (1, 0),
16
    "2": (2, 5),
17
    "3": (3, 7),
18
    "4": (4, 9),
19
    "5": (5, 13),
20
    "6": (6, 15),
21
    "7": (7, 17),
22
    "8": (8, 19),
23
    "9": (9, 21),
24
    "A": (0, 1),
25
    "B": (1, 0),
26
    "C": (2, 5),
27
    "D": (3, 7),
28
    "E": (4, 9),
29
    "F": (5, 13),
30
    "G": (6, 15),
31
    "H": (7, 17),
32
    "I": (8, 19),
33
    "J": (9, 21),
34
    "K": (10, 2),
35
    "L": (11, 4),
36
    "M": (12, 18),
37
    "N": (13, 20),
38
    "O": (14, 11),
39
    "P": (15, 3),
40
    "Q": (16, 6),
41
    "R": (17, 8),
42
    "S": (18, 12),
43
    "T": (19, 14),
44
    "U": (20, 16),
45
    "V": (21, 10),
46
    "W": (22, 22),
47
    "X": (23, 25),
48
    "Y": (24, 24),
49
    "Z": (25, 23),
50
}
51
_CIN_REMAINDERS = list(string.ascii_uppercase)
52
53
_OMOCODIA = {
54
    "0": "L",
55
    "1": "M",
56
    "2": "N",
57
    "3": "P",
58
    "4": "Q",
59
    "5": "R",
60
    "6": "S",
61
    "7": "T",
62
    "8": "U",
63
    "9": "V",
64
}
65
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA])
66
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
67
_OMOCODIA_ENCODE_TRANS = "".maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS)
68
_OMOCODIA_DECODE_TRANS = "".maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS)
69
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14]))
70
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]]
71
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
72
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
73
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
74
75
76
def _get_data(filename):
77
    return fsutil.read_file_json(fsutil.join_path(__file__, f"data/{filename}"))
78
79
80
def _get_indexed_data():
81
    municipalities = _get_data("municipalities.json")
82
    countries = _get_data("countries.json")
83
    data = {
84
        "municipalities": {},
85
        "countries": {},
86
        "codes": {},
87
    }
88
89
    for municipality in municipalities:
90
        code = municipality["code"]
91
        province = municipality["province"].lower()
92
        names = municipality["name_slugs"]
93
        for name in names:
94
            name_and_province = f"{name}-{province}"
95
            data["municipalities"].setdefault(name, [])
96
            data["municipalities"].setdefault(name_and_province, [])
97
            data["municipalities"][name].append(municipality)
98
            data["municipalities"][name_and_province].append(municipality)
99
        data["codes"].setdefault(code, [])
100
        data["codes"][code].append(municipality)
101
102
    for country in countries:
103
        code = country["code"]
104
        names = country["name_slugs"]
105
        for name in names:
106
            data["countries"].setdefault(name, [])
107
            data["countries"][name].append(country)
108
        data["codes"].setdefault(code, [])
109
        data["codes"][code].append(country)
110
111
    return data
112
113
114
_DATA = _get_indexed_data()
115
116
CODICEFISCALE_RE = re.compile(
117
    r"^"
118
    r"(?P<surname>[a-z]{3})"
119
    r"(?P<name>[a-z]{3})"
120
    r"(?P<birthdate>(?P<birthdate_year>[a-z\d]{2})(?P<birthdate_month>[abcdehlmprst]{1})(?P<birthdate_day>[a-z\d]{2}))"
121
    r"(?P<birthplace>[a-z]{1}[a-z\d]{3})"
122
    r"(?P<cin>[a-z]{1})$",
123
    re.IGNORECASE,
124
)
125
126
127
def _get_consonants(s):
128
    return [char for char in s if char in _CONSONANTS]
129
130
131
def _get_vowels(s):
132
    return [char for char in s if char in _VOWELS]
133
134
135
def _get_consonants_and_vowels(consonants, vowels):
136
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
137
138
139
def _get_date(date, separator="-"):
140
    if not date:
141
        return None
142
    if isinstance(date, datetime):
143
        return date
144
    date_slug = slugify(date)
145
    date_parts = date_slug.split("-")[:3]
146
    date_parser_options = (
147
        {
148
            "yearfirst": True,
149
        }
150
        if len(date_parts[0]) == 4
151
        else {
152
            "dayfirst": True,
153
        }
154
    )
155
    try:
156
        date_obj = date_parser.parse(date_slug, **date_parser_options)
157
        return date_obj
158
    except ValueError as e:
159
        raise ValueError(f"[codicefiscale] {e}")
160
161
162
def _get_birthplace(birthplace, birthdate=None):
163
    birthplace_slug = slugify(birthplace)
164
    birthplace_code = birthplace_slug.upper()
165
    birthplaces_options = _DATA["municipalities"].get(
166
        birthplace_slug,
167
        _DATA["countries"].get(
168
            birthplace_slug,
169
            _DATA["codes"].get(
170
                birthplace_code,
171
            ),
172
        ),
173
    )
174
    if not birthplaces_options:
175
        return None
176
177
    birthdate_date = _get_date(birthdate)
178
    if not birthdate_date:
179
        return birthplaces_options[0].copy()
180
181
    for birthplace_option in birthplaces_options:
182
        date_created = _get_date(birthplace_option["date_created"]) or datetime.min
183
        date_created = date_created.replace(tzinfo=None)
184
        date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
185
        date_deleted = date_deleted.replace(tzinfo=None)
186
        # print(birthdate_date, date_created, date_deleted)
187
        if birthdate_date >= date_created and birthdate_date <= date_deleted:
188
            return birthplace_option.copy()
189
190
    return None
191
192
193
def _get_omocode(code, subs, trans):
194
    code_chars = list(code[0:15])
195
    for i in subs:
196
        code_chars[i] = code_chars[i].translate(trans)
197
    code = "".join(code_chars)
198
    code_cin = encode_cin(code)
199
    code += code_cin
200
    return code
201
202
203
def _get_omocodes(code):
204
    code_root = _get_omocode(
205
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
206
    )
207
    codes = [
208
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
209
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
210
    ]
211
    return codes
212
213
214
def encode_surname(surname):
215
    """
216
    Encode surname to the code used in italian fiscal code.
217
218
    :param surname: The surname
219
    :type surname: string
220
221
    :returns: The code used in italian fiscal code
222
    :rtype: string
223
    """
224
    surname_slug = slugify(surname)
225
    surname_consonants = _get_consonants(surname_slug)
226
    surname_vowels = _get_vowels(surname_slug)
227
    surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels)
228
    return surname_code
229
230
231
def encode_name(name):
232
    """
233
    Encodes name to the code used in italian fiscal code.
234
235
    :param name: The name
236
    :type name: string
237
238
    :returns: The code used in italian fiscal code
239
    :rtype: string
240
    """
241
    name_slug = slugify(name)
242
    name_consonants = _get_consonants(name_slug)
243
244
    if len(name_consonants) > 3:
245
        del name_consonants[1]
246
247
    name_vowels = _get_vowels(name_slug)
248
    name_code = _get_consonants_and_vowels(name_consonants, name_vowels)
249
    return name_code
250
251
252
def encode_birthdate(birthdate, sex):
253
    """
254
    Encodes birthdate to the code used in italian fiscal code.
255
256
    :param birthdate: The birthdate
257
    :type birthdate: datetime or string
258
    :param sex: The sex, 'M' or 'F'
259
    :type sex: string
260
261
    :returns: The code used in italian fiscal code
262
    :rtype: string
263
    """
264
    if not birthdate:
265
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
266
    date = _get_date(birthdate)
267
268
    if not sex:
269
        raise ValueError("[codicefiscale] 'sex' argument cant be None")
270
    sex = sex.upper()
271
    if sex not in ["M", "F"]:
272
        raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'")
273
274
    year_code = str(date.year)[2:]
275
    month_code = _MONTHS[date.month - 1]
276
    day_code = str(date.day + (40 if sex == "F" else 0)).zfill(2).upper()
277
    date_code = year_code + month_code + day_code
278
    return date_code
279
280
281
def encode_birthplace(birthplace, birthdate=None):
282
    """
283
    Encodes birthplace to the code used in italian fiscal code.
284
285
    :param birthplace: The birthplace
286
    :type birthplace: string
287
288
    :returns: The code used in italian fiscal code
289
    :rtype: string
290
    """
291
    if not birthplace:
292
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
293
294
    birthplace_without_province = re.split(r",|\(", birthplace)[0]
295
    birthplace_data = _get_birthplace(birthplace, birthdate,) or _get_birthplace(
296
        birthplace_without_province,
297
        birthdate,
298
    )
299
300
    if not birthplace_data:
301
        raise ValueError(
302
            f"[codicefiscale] 'birthplace' argument not mapped to code: ('{birthplace}' -> '')"
303
        )
304
305
    birthplace_code = birthplace_data["code"]
306
    return birthplace_code
307
308
309
def encode_cin(code):
310
    """
311
    Encodes cin to the code used in italian fiscal code.
312
313
    :param code: The code
314
    :type code: string
315
316
    :returns: The code used in italian fiscal code
317
    :rtype: string
318
    """
319
    if not code:
320
        raise ValueError("[codicefiscale] 'code' argument cant be None")
321
322
    code_len = len(code)
323
    if code_len not in [15, 16]:
324
        raise ValueError(
325
            f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
326
        )
327
328
    cin_tot = 0
329
    for i, char in enumerate(code[0:15]):
330
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
331
    cin_code = _CIN_REMAINDERS[cin_tot % 26]
332
333
    # print(cin_code)
334
    return cin_code
335
336
337
def encode(surname, name, sex, birthdate, birthplace):
338
    """
339
    Encodes the italian fiscal code.
340
341
    :param surname: The surname
342
    :type surname: string
343
    :param name: The name
344
    :type name: string
345
    :param sex: The sex, 'M' or 'F'
346
    :type sex: string
347
    :param birthdate: The birthdate
348
    :type birthdate: datetime or string
349
    :param birthplace: The birthplace
350
    :type birthplace: string
351
352
    :returns: The italian fiscal code
353
    :rtype: string
354
    """
355
    code = ""
356
    code += encode_surname(surname)
357
    code += encode_name(name)
358
    code += encode_birthdate(birthdate, sex)
359
    code += encode_birthplace(birthplace, birthdate)
360
    code += encode_cin(code)
361
362
    # raise ValueError if code is not valid
363
    data = decode(code)
364
    return data["code"]
365
366
367
def decode_raw(code):
368
    """
369
    Decodes the raw data associated to the code.
370
371
    :param code: The code
372
    :type code: string
373
374
    :returns: The raw data associated to the code.
375
    :rtype: dict
376
    """
377
    code = slugify(code)
378
    code = code.replace("-", "")
379
    code = code.upper()
380
381
    match = CODICEFISCALE_RE.match(code)
382
    if not match:
383
        raise ValueError(f"[codicefiscale] invalid syntax: {code}")
384
385
    data = {
386
        "code": code,
387
        "surname": match["surname"],
388
        "name": match["name"],
389
        "birthdate": match["birthdate"],
390
        "birthdate_year": match["birthdate_year"],
391
        "birthdate_month": match["birthdate_month"],
392
        "birthdate_day": match["birthdate_day"],
393
        "birthplace": match["birthplace"],
394
        "cin": match["cin"],
395
    }
396
397
    return data
398
399
400
def decode(code):
401
    """
402
    Decodes the italian fiscal code.
403
404
    :param code: The code
405
    :type code: string
406
407
    :returns: The data associated to the code and some additional info.
408
    :rtype: dict
409
    """
410
    raw = decode_raw(code)
411
412
    code = raw["code"]
413
414
    birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS)
415
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
416
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
417
418
    if birthdate_day > 40:
419
        birthdate_day -= 40
420
        sex = "F"
421
    else:
422
        sex = "M"
423
424
    current_year = datetime.now().year
425
    current_year_century_prefix = str(current_year)[0:-2]
426
    birthdate_year = int(f"{current_year_century_prefix}{birthdate_year}")
427
    if birthdate_year > current_year:
428
        birthdate_year -= 100
429
    birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
430
    birthdate = _get_date(birthdate_str, separator="/")
431
    if not birthdate:
432
        raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")
433
434
    birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate(
435
        _OMOCODIA_DECODE_TRANS
436
    )
437
    birthplace = _get_birthplace(birthplace_code, birthdate)
438
    # print(birthplace)
439
    if not birthplace:
440
        raise ValueError(f"[codicefiscale] wrong birthplace code: '{birthplace_code}'")
441
442
    cin = raw["cin"]
443
    cin_check = encode_cin(code)
444
    # print(cin, cin_check)
445
    if cin != cin_check:
446
        raise ValueError(
447
            f"[codicefiscale] wrong CIN (Control Internal Number): expected '{cin_check}', found '{cin}'"
448
        )
449
450
    data = {
451
        "code": code,
452
        "omocodes": _get_omocodes(code),
453
        "sex": sex,
454
        "birthdate": birthdate,
455
        "birthplace": birthplace,
456
        "raw": raw,
457
    }
458
459
    # print(data)
460
    return data
461
462
463
def is_omocode(code):
464
    """
465
    Determines whether the specified code is omocode or not.
466
467
    :param code: The code
468
    :type code: string
469
470
    :returns: True if the specified code is omocode, False otherwise.
471
    :rtype: boolean
472
    """
473
    data = decode(code)
474
    codes = data["omocodes"]
475
    codes.pop(0)
476
    return code in codes
477
478
479
def is_valid(code):
480
    """
481
    Determines whether the specified code is valid.
482
483
    :param code: The code
484
    :type code: string
485
486
    :returns: True if the specified code is valid, False otherwise.
487
    :rtype: boolean
488
    """
489
    try:
490
        decode(code)
491
        return True
492
    except ValueError:
493
        return False
494