1
|
|
|
import re |
2
|
|
|
import string |
3
|
|
|
from datetime import datetime |
4
|
|
|
from itertools import combinations |
5
|
|
|
|
6
|
|
|
import fsutil |
7
|
|
|
from dateutil import parser as date_parser |
8
|
|
|
from slugify import slugify |
9
|
|
|
|
10
|
|
|
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz") |
11
|
|
|
_VOWELS = list("aeiou") |
12
|
|
|
_MONTHS = list("ABCDEHLMPRST") |
13
|
|
|
_CIN = { |
14
|
|
|
"0": (0, 1), |
15
|
|
|
"1": (1, 0), |
16
|
|
|
"2": (2, 5), |
17
|
|
|
"3": (3, 7), |
18
|
|
|
"4": (4, 9), |
19
|
|
|
"5": (5, 13), |
20
|
|
|
"6": (6, 15), |
21
|
|
|
"7": (7, 17), |
22
|
|
|
"8": (8, 19), |
23
|
|
|
"9": (9, 21), |
24
|
|
|
"A": (0, 1), |
25
|
|
|
"B": (1, 0), |
26
|
|
|
"C": (2, 5), |
27
|
|
|
"D": (3, 7), |
28
|
|
|
"E": (4, 9), |
29
|
|
|
"F": (5, 13), |
30
|
|
|
"G": (6, 15), |
31
|
|
|
"H": (7, 17), |
32
|
|
|
"I": (8, 19), |
33
|
|
|
"J": (9, 21), |
34
|
|
|
"K": (10, 2), |
35
|
|
|
"L": (11, 4), |
36
|
|
|
"M": (12, 18), |
37
|
|
|
"N": (13, 20), |
38
|
|
|
"O": (14, 11), |
39
|
|
|
"P": (15, 3), |
40
|
|
|
"Q": (16, 6), |
41
|
|
|
"R": (17, 8), |
42
|
|
|
"S": (18, 12), |
43
|
|
|
"T": (19, 14), |
44
|
|
|
"U": (20, 16), |
45
|
|
|
"V": (21, 10), |
46
|
|
|
"W": (22, 22), |
47
|
|
|
"X": (23, 25), |
48
|
|
|
"Y": (24, 24), |
49
|
|
|
"Z": (25, 23), |
50
|
|
|
} |
51
|
|
|
_CIN_REMAINDERS = list(string.ascii_uppercase) |
52
|
|
|
|
53
|
|
|
_OMOCODIA = { |
54
|
|
|
"0": "L", |
55
|
|
|
"1": "M", |
56
|
|
|
"2": "N", |
57
|
|
|
"3": "P", |
58
|
|
|
"4": "Q", |
59
|
|
|
"5": "R", |
60
|
|
|
"6": "S", |
61
|
|
|
"7": "T", |
62
|
|
|
"8": "U", |
63
|
|
|
"9": "V", |
64
|
|
|
} |
65
|
|
|
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA]) |
66
|
|
|
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA]) |
67
|
|
|
_OMOCODIA_ENCODE_TRANS = "".maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS) |
68
|
|
|
_OMOCODIA_DECODE_TRANS = "".maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS) |
69
|
|
|
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14])) |
70
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]] |
71
|
|
|
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1): |
72
|
|
|
for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size): |
73
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo)) |
74
|
|
|
|
75
|
|
|
|
76
|
|
|
def _get_data(filename): |
77
|
|
|
return fsutil.read_file_json(fsutil.join_path(__file__, f"data/{filename}")) |
78
|
|
|
|
79
|
|
|
|
80
|
|
|
def _get_indexed_data(): |
81
|
|
|
municipalities = _get_data("municipalities.json") |
82
|
|
|
countries = _get_data("countries.json") |
83
|
|
|
data = { |
84
|
|
|
"municipalities": {}, |
85
|
|
|
"countries": {}, |
86
|
|
|
"codes": {}, |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
for municipality in municipalities: |
90
|
|
|
code = municipality["code"] |
91
|
|
|
province = municipality["province"].lower() |
92
|
|
|
names = municipality["name_slugs"] |
93
|
|
|
for name in names: |
94
|
|
|
name_and_province = f"{name}-{province}" |
95
|
|
|
data["municipalities"].setdefault(name, []) |
96
|
|
|
data["municipalities"].setdefault(name_and_province, []) |
97
|
|
|
data["municipalities"][name].append(municipality) |
98
|
|
|
data["municipalities"][name_and_province].append(municipality) |
99
|
|
|
data["codes"].setdefault(code, []) |
100
|
|
|
data["codes"][code].append(municipality) |
101
|
|
|
|
102
|
|
|
for country in countries: |
103
|
|
|
code = country["code"] |
104
|
|
|
names = country["name_slugs"] |
105
|
|
|
for name in names: |
106
|
|
|
data["countries"].setdefault(name, []) |
107
|
|
|
data["countries"][name].append(country) |
108
|
|
|
data["codes"].setdefault(code, []) |
109
|
|
|
data["codes"][code].append(country) |
110
|
|
|
|
111
|
|
|
return data |
112
|
|
|
|
113
|
|
|
|
114
|
|
|
_DATA = _get_indexed_data() |
115
|
|
|
|
116
|
|
|
CODICEFISCALE_RE = re.compile( |
117
|
|
|
r"^" |
118
|
|
|
r"(?P<surname>[a-z]{3})" |
119
|
|
|
r"(?P<name>[a-z]{3})" |
120
|
|
|
r"(?P<birthdate>(?P<birthdate_year>[a-z\d]{2})(?P<birthdate_month>[abcdehlmprst]{1})(?P<birthdate_day>[a-z\d]{2}))" |
121
|
|
|
r"(?P<birthplace>[a-z]{1}[a-z\d]{3})" |
122
|
|
|
r"(?P<cin>[a-z]{1})$", |
123
|
|
|
re.IGNORECASE, |
124
|
|
|
) |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
def _get_consonants(s): |
128
|
|
|
return [char for char in s if char in _CONSONANTS] |
129
|
|
|
|
130
|
|
|
|
131
|
|
|
def _get_vowels(s): |
132
|
|
|
return [char for char in s if char in _VOWELS] |
133
|
|
|
|
134
|
|
|
|
135
|
|
|
def _get_consonants_and_vowels(consonants, vowels): |
136
|
|
|
return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper() |
137
|
|
|
|
138
|
|
|
|
139
|
|
|
def _get_date(date, separator="-"): |
140
|
|
|
if not date: |
141
|
|
|
return None |
142
|
|
|
if isinstance(date, datetime): |
143
|
|
|
return date |
144
|
|
|
date_slug = slugify(date) |
145
|
|
|
date_parts = date_slug.split("-")[:3] |
146
|
|
|
date_parser_options = ( |
147
|
|
|
{ |
148
|
|
|
"yearfirst": True, |
149
|
|
|
} |
150
|
|
|
if len(date_parts[0]) == 4 |
151
|
|
|
else { |
152
|
|
|
"dayfirst": True, |
153
|
|
|
} |
154
|
|
|
) |
155
|
|
|
try: |
156
|
|
|
date_obj = date_parser.parse(date_slug, **date_parser_options) |
157
|
|
|
return date_obj |
158
|
|
|
except ValueError as e: |
159
|
|
|
raise ValueError(f"[codicefiscale] {e}") |
160
|
|
|
|
161
|
|
|
|
162
|
|
|
def _get_birthplace(birthplace, birthdate=None): |
163
|
|
|
birthplace_slug = slugify(birthplace) |
164
|
|
|
birthplace_code = birthplace_slug.upper() |
165
|
|
|
birthplaces_options = _DATA["municipalities"].get( |
166
|
|
|
birthplace_slug, |
167
|
|
|
_DATA["countries"].get( |
168
|
|
|
birthplace_slug, |
169
|
|
|
_DATA["codes"].get( |
170
|
|
|
birthplace_code, |
171
|
|
|
), |
172
|
|
|
), |
173
|
|
|
) |
174
|
|
|
if not birthplaces_options: |
175
|
|
|
return None |
176
|
|
|
|
177
|
|
|
birthdate_date = _get_date(birthdate) |
178
|
|
|
if not birthdate_date: |
179
|
|
|
return birthplaces_options[0].copy() |
180
|
|
|
|
181
|
|
|
for birthplace_option in birthplaces_options: |
182
|
|
|
date_created = _get_date(birthplace_option["date_created"]) or datetime.min |
183
|
|
|
date_created = date_created.replace(tzinfo=None) |
184
|
|
|
date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max |
185
|
|
|
date_deleted = date_deleted.replace(tzinfo=None) |
186
|
|
|
# print(birthdate_date, date_created, date_deleted) |
187
|
|
|
if birthdate_date >= date_created and birthdate_date <= date_deleted: |
188
|
|
|
return birthplace_option.copy() |
189
|
|
|
|
190
|
|
|
return None |
191
|
|
|
|
192
|
|
|
|
193
|
|
|
def _get_omocode(code, subs, trans): |
194
|
|
|
code_chars = list(code[0:15]) |
195
|
|
|
for i in subs: |
196
|
|
|
code_chars[i] = code_chars[i].translate(trans) |
197
|
|
|
code = "".join(code_chars) |
198
|
|
|
code_cin = encode_cin(code) |
199
|
|
|
code += code_cin |
200
|
|
|
return code |
201
|
|
|
|
202
|
|
|
|
203
|
|
|
def _get_omocodes(code): |
204
|
|
|
code_root = _get_omocode( |
205
|
|
|
code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS |
206
|
|
|
) |
207
|
|
|
codes = [ |
208
|
|
|
_get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS) |
209
|
|
|
for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS |
210
|
|
|
] |
211
|
|
|
return codes |
212
|
|
|
|
213
|
|
|
|
214
|
|
|
def encode_surname(surname): |
215
|
|
|
""" |
216
|
|
|
Encode surname to the code used in italian fiscal code. |
217
|
|
|
|
218
|
|
|
:param surname: The surname |
219
|
|
|
:type surname: string |
220
|
|
|
|
221
|
|
|
:returns: The code used in italian fiscal code |
222
|
|
|
:rtype: string |
223
|
|
|
""" |
224
|
|
|
surname_slug = slugify(surname) |
225
|
|
|
surname_consonants = _get_consonants(surname_slug) |
226
|
|
|
surname_vowels = _get_vowels(surname_slug) |
227
|
|
|
surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels) |
228
|
|
|
return surname_code |
229
|
|
|
|
230
|
|
|
|
231
|
|
|
def encode_name(name): |
232
|
|
|
""" |
233
|
|
|
Encodes name to the code used in italian fiscal code. |
234
|
|
|
|
235
|
|
|
:param name: The name |
236
|
|
|
:type name: string |
237
|
|
|
|
238
|
|
|
:returns: The code used in italian fiscal code |
239
|
|
|
:rtype: string |
240
|
|
|
""" |
241
|
|
|
name_slug = slugify(name) |
242
|
|
|
name_consonants = _get_consonants(name_slug) |
243
|
|
|
|
244
|
|
|
if len(name_consonants) > 3: |
245
|
|
|
del name_consonants[1] |
246
|
|
|
|
247
|
|
|
name_vowels = _get_vowels(name_slug) |
248
|
|
|
name_code = _get_consonants_and_vowels(name_consonants, name_vowels) |
249
|
|
|
return name_code |
250
|
|
|
|
251
|
|
|
|
252
|
|
|
def encode_birthdate(birthdate, sex): |
253
|
|
|
""" |
254
|
|
|
Encodes birthdate to the code used in italian fiscal code. |
255
|
|
|
|
256
|
|
|
:param birthdate: The birthdate |
257
|
|
|
:type birthdate: datetime or string |
258
|
|
|
:param sex: The sex, 'M' or 'F' |
259
|
|
|
:type sex: string |
260
|
|
|
|
261
|
|
|
:returns: The code used in italian fiscal code |
262
|
|
|
:rtype: string |
263
|
|
|
""" |
264
|
|
|
if not birthdate: |
265
|
|
|
raise ValueError("[codicefiscale] 'birthdate' argument cant be None") |
266
|
|
|
date = _get_date(birthdate) |
267
|
|
|
|
268
|
|
|
if not sex: |
269
|
|
|
raise ValueError("[codicefiscale] 'sex' argument cant be None") |
270
|
|
|
sex = sex.upper() |
271
|
|
|
if sex not in ["M", "F"]: |
272
|
|
|
raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'") |
273
|
|
|
|
274
|
|
|
year_code = str(date.year)[2:] |
275
|
|
|
month_code = _MONTHS[date.month - 1] |
276
|
|
|
day_code = str(date.day + (40 if sex == "F" else 0)).zfill(2).upper() |
277
|
|
|
date_code = year_code + month_code + day_code |
278
|
|
|
return date_code |
279
|
|
|
|
280
|
|
|
|
281
|
|
|
def encode_birthplace(birthplace, birthdate=None): |
282
|
|
|
""" |
283
|
|
|
Encodes birthplace to the code used in italian fiscal code. |
284
|
|
|
|
285
|
|
|
:param birthplace: The birthplace |
286
|
|
|
:type birthplace: string |
287
|
|
|
|
288
|
|
|
:returns: The code used in italian fiscal code |
289
|
|
|
:rtype: string |
290
|
|
|
""" |
291
|
|
|
if not birthplace: |
292
|
|
|
raise ValueError("[codicefiscale] 'birthplace' argument cant be None") |
293
|
|
|
|
294
|
|
|
birthplace_without_province = re.split(r",|\(", birthplace)[0] |
295
|
|
|
birthplace_data = _get_birthplace(birthplace, birthdate,) or _get_birthplace( |
296
|
|
|
birthplace_without_province, |
297
|
|
|
birthdate, |
298
|
|
|
) |
299
|
|
|
|
300
|
|
|
if not birthplace_data: |
301
|
|
|
raise ValueError( |
302
|
|
|
f"[codicefiscale] 'birthplace' argument not mapped to code: ('{birthplace}' -> '')" |
303
|
|
|
) |
304
|
|
|
|
305
|
|
|
birthplace_code = birthplace_data["code"] |
306
|
|
|
return birthplace_code |
307
|
|
|
|
308
|
|
|
|
309
|
|
|
def encode_cin(code): |
310
|
|
|
""" |
311
|
|
|
Encodes cin to the code used in italian fiscal code. |
312
|
|
|
|
313
|
|
|
:param code: The code |
314
|
|
|
:type code: string |
315
|
|
|
|
316
|
|
|
:returns: The code used in italian fiscal code |
317
|
|
|
:rtype: string |
318
|
|
|
""" |
319
|
|
|
if not code: |
320
|
|
|
raise ValueError("[codicefiscale] 'code' argument cant be None") |
321
|
|
|
|
322
|
|
|
code_len = len(code) |
323
|
|
|
if code_len not in [15, 16]: |
324
|
|
|
raise ValueError( |
325
|
|
|
f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}" |
326
|
|
|
) |
327
|
|
|
|
328
|
|
|
cin_tot = 0 |
329
|
|
|
for i, char in enumerate(code[0:15]): |
330
|
|
|
cin_tot += _CIN[char][int(bool((i + 1) % 2))] |
331
|
|
|
cin_code = _CIN_REMAINDERS[cin_tot % 26] |
332
|
|
|
|
333
|
|
|
# print(cin_code) |
334
|
|
|
return cin_code |
335
|
|
|
|
336
|
|
|
|
337
|
|
|
def encode(surname, name, sex, birthdate, birthplace): |
338
|
|
|
""" |
339
|
|
|
Encodes the italian fiscal code. |
340
|
|
|
|
341
|
|
|
:param surname: The surname |
342
|
|
|
:type surname: string |
343
|
|
|
:param name: The name |
344
|
|
|
:type name: string |
345
|
|
|
:param sex: The sex, 'M' or 'F' |
346
|
|
|
:type sex: string |
347
|
|
|
:param birthdate: The birthdate |
348
|
|
|
:type birthdate: datetime or string |
349
|
|
|
:param birthplace: The birthplace |
350
|
|
|
:type birthplace: string |
351
|
|
|
|
352
|
|
|
:returns: The italian fiscal code |
353
|
|
|
:rtype: string |
354
|
|
|
""" |
355
|
|
|
code = "" |
356
|
|
|
code += encode_surname(surname) |
357
|
|
|
code += encode_name(name) |
358
|
|
|
code += encode_birthdate(birthdate, sex) |
359
|
|
|
code += encode_birthplace(birthplace, birthdate) |
360
|
|
|
code += encode_cin(code) |
361
|
|
|
|
362
|
|
|
# raise ValueError if code is not valid |
363
|
|
|
data = decode(code) |
364
|
|
|
return data["code"] |
365
|
|
|
|
366
|
|
|
|
367
|
|
|
def decode_raw(code): |
368
|
|
|
""" |
369
|
|
|
Decodes the raw data associated to the code. |
370
|
|
|
|
371
|
|
|
:param code: The code |
372
|
|
|
:type code: string |
373
|
|
|
|
374
|
|
|
:returns: The raw data associated to the code. |
375
|
|
|
:rtype: dict |
376
|
|
|
""" |
377
|
|
|
code = slugify(code) |
378
|
|
|
code = code.replace("-", "") |
379
|
|
|
code = code.upper() |
380
|
|
|
|
381
|
|
|
match = CODICEFISCALE_RE.match(code) |
382
|
|
|
if not match: |
383
|
|
|
raise ValueError(f"[codicefiscale] invalid syntax: {code}") |
384
|
|
|
|
385
|
|
|
data = { |
386
|
|
|
"code": code, |
387
|
|
|
"surname": match["surname"], |
388
|
|
|
"name": match["name"], |
389
|
|
|
"birthdate": match["birthdate"], |
390
|
|
|
"birthdate_year": match["birthdate_year"], |
391
|
|
|
"birthdate_month": match["birthdate_month"], |
392
|
|
|
"birthdate_day": match["birthdate_day"], |
393
|
|
|
"birthplace": match["birthplace"], |
394
|
|
|
"cin": match["cin"], |
395
|
|
|
} |
396
|
|
|
|
397
|
|
|
return data |
398
|
|
|
|
399
|
|
|
|
400
|
|
|
def decode(code): |
401
|
|
|
""" |
402
|
|
|
Decodes the italian fiscal code. |
403
|
|
|
|
404
|
|
|
:param code: The code |
405
|
|
|
:type code: string |
406
|
|
|
|
407
|
|
|
:returns: The data associated to the code and some additional info. |
408
|
|
|
:rtype: dict |
409
|
|
|
""" |
410
|
|
|
raw = decode_raw(code) |
411
|
|
|
|
412
|
|
|
code = raw["code"] |
413
|
|
|
|
414
|
|
|
birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS) |
415
|
|
|
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1 |
416
|
|
|
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS)) |
417
|
|
|
|
418
|
|
|
if birthdate_day > 40: |
419
|
|
|
birthdate_day -= 40 |
420
|
|
|
sex = "F" |
421
|
|
|
else: |
422
|
|
|
sex = "M" |
423
|
|
|
|
424
|
|
|
current_year = datetime.now().year |
425
|
|
|
current_year_century_prefix = str(current_year)[0:-2] |
426
|
|
|
birthdate_year = int(f"{current_year_century_prefix}{birthdate_year}") |
427
|
|
|
if birthdate_year > current_year: |
428
|
|
|
birthdate_year -= 100 |
429
|
|
|
birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}" |
430
|
|
|
birthdate = _get_date(birthdate_str, separator="/") |
431
|
|
|
if not birthdate: |
432
|
|
|
raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}") |
433
|
|
|
|
434
|
|
|
birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate( |
435
|
|
|
_OMOCODIA_DECODE_TRANS |
436
|
|
|
) |
437
|
|
|
birthplace = _get_birthplace(birthplace_code, birthdate) |
438
|
|
|
# print(birthplace) |
439
|
|
|
if not birthplace: |
440
|
|
|
raise ValueError(f"[codicefiscale] wrong birthplace code: '{birthplace_code}'") |
441
|
|
|
|
442
|
|
|
cin = raw["cin"] |
443
|
|
|
cin_check = encode_cin(code) |
444
|
|
|
# print(cin, cin_check) |
445
|
|
|
if cin != cin_check: |
446
|
|
|
raise ValueError( |
447
|
|
|
f"[codicefiscale] wrong CIN (Control Internal Number): expected '{cin_check}', found '{cin}'" |
448
|
|
|
) |
449
|
|
|
|
450
|
|
|
data = { |
451
|
|
|
"code": code, |
452
|
|
|
"omocodes": _get_omocodes(code), |
453
|
|
|
"sex": sex, |
454
|
|
|
"birthdate": birthdate, |
455
|
|
|
"birthplace": birthplace, |
456
|
|
|
"raw": raw, |
457
|
|
|
} |
458
|
|
|
|
459
|
|
|
# print(data) |
460
|
|
|
return data |
461
|
|
|
|
462
|
|
|
|
463
|
|
|
def is_omocode(code): |
464
|
|
|
""" |
465
|
|
|
Determines whether the specified code is omocode or not. |
466
|
|
|
|
467
|
|
|
:param code: The code |
468
|
|
|
:type code: string |
469
|
|
|
|
470
|
|
|
:returns: True if the specified code is omocode, False otherwise. |
471
|
|
|
:rtype: boolean |
472
|
|
|
""" |
473
|
|
|
data = decode(code) |
474
|
|
|
codes = data["omocodes"] |
475
|
|
|
codes.pop(0) |
476
|
|
|
return code in codes |
477
|
|
|
|
478
|
|
|
|
479
|
|
|
def is_valid(code): |
480
|
|
|
""" |
481
|
|
|
Determines whether the specified code is valid. |
482
|
|
|
|
483
|
|
|
:param code: The code |
484
|
|
|
:type code: string |
485
|
|
|
|
486
|
|
|
:returns: True if the specified code is valid, False otherwise. |
487
|
|
|
:rtype: boolean |
488
|
|
|
""" |
489
|
|
|
try: |
490
|
|
|
decode(code) |
491
|
|
|
return True |
492
|
|
|
except ValueError: |
493
|
|
|
return False |
494
|
|
|
|