1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
from datetime import datetime |
4
|
|
|
from dateutil import parser as date_parser |
5
|
|
|
from itertools import combinations |
6
|
|
|
from slugify import slugify |
7
|
|
|
|
8
|
|
|
import fsutil |
9
|
|
|
import re |
10
|
|
|
import string |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz") |
14
|
|
|
_VOWELS = list("aeiou") |
15
|
|
|
_MONTHS = list("ABCDEHLMPRST") |
16
|
|
|
_CIN = { |
17
|
|
|
"0": (0, 1), |
18
|
|
|
"1": (1, 0), |
19
|
|
|
"2": (2, 5), |
20
|
|
|
"3": (3, 7), |
21
|
|
|
"4": (4, 9), |
22
|
|
|
"5": (5, 13), |
23
|
|
|
"6": (6, 15), |
24
|
|
|
"7": (7, 17), |
25
|
|
|
"8": (8, 19), |
26
|
|
|
"9": (9, 21), |
27
|
|
|
"A": (0, 1), |
28
|
|
|
"B": (1, 0), |
29
|
|
|
"C": (2, 5), |
30
|
|
|
"D": (3, 7), |
31
|
|
|
"E": (4, 9), |
32
|
|
|
"F": (5, 13), |
33
|
|
|
"G": (6, 15), |
34
|
|
|
"H": (7, 17), |
35
|
|
|
"I": (8, 19), |
36
|
|
|
"J": (9, 21), |
37
|
|
|
"K": (10, 2), |
38
|
|
|
"L": (11, 4), |
39
|
|
|
"M": (12, 18), |
40
|
|
|
"N": (13, 20), |
41
|
|
|
"O": (14, 11), |
42
|
|
|
"P": (15, 3), |
43
|
|
|
"Q": (16, 6), |
44
|
|
|
"R": (17, 8), |
45
|
|
|
"S": (18, 12), |
46
|
|
|
"T": (19, 14), |
47
|
|
|
"U": (20, 16), |
48
|
|
|
"V": (21, 10), |
49
|
|
|
"W": (22, 22), |
50
|
|
|
"X": (23, 25), |
51
|
|
|
"Y": (24, 24), |
52
|
|
|
"Z": (25, 23), |
53
|
|
|
} |
54
|
|
|
_CIN_REMAINDERS = list(string.ascii_uppercase) |
55
|
|
|
|
56
|
|
|
_OMOCODIA = { |
57
|
|
|
"0": "L", |
58
|
|
|
"1": "M", |
59
|
|
|
"2": "N", |
60
|
|
|
"3": "P", |
61
|
|
|
"4": "Q", |
62
|
|
|
"5": "R", |
63
|
|
|
"6": "S", |
64
|
|
|
"7": "T", |
65
|
|
|
"8": "U", |
66
|
|
|
"9": "V", |
67
|
|
|
} |
68
|
|
|
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA]) |
69
|
|
|
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA]) |
70
|
|
|
_OMOCODIA_ENCODE_TRANS = "".maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS) |
71
|
|
|
_OMOCODIA_DECODE_TRANS = "".maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS) |
72
|
|
|
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14])) |
73
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]] |
74
|
|
|
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1): |
75
|
|
|
for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size): |
76
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo)) |
77
|
|
|
|
78
|
|
|
|
79
|
|
|
def _get_data(filename): |
80
|
|
|
return fsutil.read_file_json(fsutil.join_path(__file__, f"data/{filename}")) |
81
|
|
|
|
82
|
|
|
|
83
|
|
|
def _get_indexed_data(): |
84
|
|
|
municipalities = _get_data("municipalities.json") |
85
|
|
|
countries = _get_data("countries.json") |
86
|
|
|
data = { |
87
|
|
|
"municipalities": {}, |
88
|
|
|
"countries": {}, |
89
|
|
|
"codes": {}, |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
for municipality in municipalities: |
93
|
|
|
code = municipality["code"] |
94
|
|
|
province = municipality["province"].lower() |
95
|
|
|
names = municipality["name_slugs"] |
96
|
|
|
for name in names: |
97
|
|
|
data["municipalities"][name] = municipality |
98
|
|
|
data["municipalities"][name + "-" + province] = municipality |
99
|
|
|
if code not in data["codes"]: |
100
|
|
|
data["codes"][code] = [] |
101
|
|
|
data["codes"][code].append(municipality) |
102
|
|
|
|
103
|
|
|
for country in countries: |
104
|
|
|
code = country["code"] |
105
|
|
|
names = country["name_slugs"] |
106
|
|
|
for name in names: |
107
|
|
|
data["countries"][name] = country |
108
|
|
|
if code not in data["codes"]: |
109
|
|
|
data["codes"][code] = [] |
110
|
|
|
data["codes"][code].append(country) |
111
|
|
|
|
112
|
|
|
return data |
113
|
|
|
|
114
|
|
|
|
115
|
|
|
_DATA = _get_indexed_data() |
116
|
|
|
|
117
|
|
|
CODICEFISCALE_RE = re.compile( |
118
|
|
|
r"^" |
119
|
|
|
r"([a-z]{3})" |
120
|
|
|
r"([a-z]{3})" |
121
|
|
|
r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))" |
122
|
|
|
r"([a-z]{1}[a-z\d]{3})" |
123
|
|
|
r"([a-z]{1})$", |
124
|
|
|
re.IGNORECASE, |
125
|
|
|
) |
126
|
|
|
|
127
|
|
|
|
128
|
|
|
def _get_consonants(s): |
129
|
|
|
return [char for char in s if char in _CONSONANTS] |
130
|
|
|
|
131
|
|
|
|
132
|
|
|
def _get_vowels(s): |
133
|
|
|
return [char for char in s if char in _VOWELS] |
134
|
|
|
|
135
|
|
|
|
136
|
|
|
def _get_consonants_and_vowels(consonants, vowels): |
137
|
|
|
return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper() |
138
|
|
|
|
139
|
|
|
|
140
|
|
|
def _get_omocode(code, subs, trans): |
141
|
|
|
code_chars = list(code[0:15]) |
142
|
|
|
for i in subs: |
143
|
|
|
code_chars[i] = code_chars[i].translate(trans) |
144
|
|
|
code = "".join(code_chars) |
145
|
|
|
code_cin = encode_cin(code) |
146
|
|
|
code += code_cin |
147
|
|
|
return code |
148
|
|
|
|
149
|
|
|
|
150
|
|
|
def _get_omocodes(code): |
151
|
|
|
code_root = _get_omocode( |
152
|
|
|
code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS |
153
|
|
|
) |
154
|
|
|
codes = [ |
155
|
|
|
_get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS) |
156
|
|
|
for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS |
157
|
|
|
] |
158
|
|
|
return codes |
159
|
|
|
|
160
|
|
|
|
161
|
|
|
def encode_surname(surname): |
162
|
|
|
""" |
163
|
|
|
Encode surname to the code used in italian fiscal code. |
164
|
|
|
|
165
|
|
|
:param surname: The surname |
166
|
|
|
:type surname: string |
167
|
|
|
|
168
|
|
|
:returns: The code used in italian fiscal code |
169
|
|
|
:rtype: string |
170
|
|
|
""" |
171
|
|
|
surname_slug = slugify(surname) |
172
|
|
|
surname_consonants = _get_consonants(surname_slug) |
173
|
|
|
surname_vowels = _get_vowels(surname_slug) |
174
|
|
|
surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels) |
175
|
|
|
return surname_code |
176
|
|
|
|
177
|
|
|
|
178
|
|
|
def encode_name(name): |
179
|
|
|
""" |
180
|
|
|
Encodes name to the code used in italian fiscal code. |
181
|
|
|
|
182
|
|
|
:param name: The name |
183
|
|
|
:type name: string |
184
|
|
|
|
185
|
|
|
:returns: The code used in italian fiscal code |
186
|
|
|
:rtype: string |
187
|
|
|
""" |
188
|
|
|
name_slug = slugify(name) |
189
|
|
|
name_consonants = _get_consonants(name_slug) |
190
|
|
|
|
191
|
|
|
if len(name_consonants) > 3: |
192
|
|
|
del name_consonants[1] |
193
|
|
|
|
194
|
|
|
name_vowels = _get_vowels(name_slug) |
195
|
|
|
name_code = _get_consonants_and_vowels(name_consonants, name_vowels) |
196
|
|
|
return name_code |
197
|
|
|
|
198
|
|
|
|
199
|
|
|
def encode_birthdate(birthdate, sex): |
200
|
|
|
""" |
201
|
|
|
Encodes birthdate to the code used in italian fiscal code. |
202
|
|
|
|
203
|
|
|
:param birthdate: The birthdate |
204
|
|
|
:type birthdate: datetime or string |
205
|
|
|
:param sex: The sex, 'M' or 'F' |
206
|
|
|
:type sex: string |
207
|
|
|
|
208
|
|
|
:returns: The code used in italian fiscal code |
209
|
|
|
:rtype: string |
210
|
|
|
""" |
211
|
|
|
if not birthdate: |
212
|
|
|
raise ValueError("[codicefiscale] 'birthdate' argument cant be None") |
213
|
|
|
|
214
|
|
|
if not sex: |
215
|
|
|
raise ValueError("[codicefiscale] 'sex' argument cant be None") |
216
|
|
|
|
217
|
|
|
sex = sex.upper() |
218
|
|
|
|
219
|
|
|
if sex not in ["M", "F"]: |
220
|
|
|
raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'") |
221
|
|
|
|
222
|
|
|
if isinstance(birthdate, datetime): |
223
|
|
|
date_obj = birthdate |
224
|
|
|
else: |
225
|
|
|
date_slug = slugify(birthdate) |
226
|
|
|
date_parts = date_slug.split("-")[:3] |
227
|
|
|
date_kwargs = ( |
228
|
|
|
{"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True} |
229
|
|
|
) |
230
|
|
|
try: |
231
|
|
|
date_obj = date_parser.parse(date_slug, **date_kwargs) |
232
|
|
|
except ValueError as e: |
233
|
|
|
raise ValueError(f"[codicefiscale] {e}") |
234
|
|
|
|
235
|
|
|
year_code = str(date_obj.year)[2:] |
236
|
|
|
month_code = _MONTHS[date_obj.month - 1] |
237
|
|
|
day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper() |
238
|
|
|
date_code = year_code + month_code + day_code |
239
|
|
|
return date_code |
240
|
|
|
|
241
|
|
|
|
242
|
|
|
def encode_birthplace(birthplace): |
243
|
|
|
""" |
244
|
|
|
Encodes birthplace to the code used in italian fiscal code. |
245
|
|
|
|
246
|
|
|
:param birthplace: The birthplace |
247
|
|
|
:type birthplace: string |
248
|
|
|
|
249
|
|
|
:returns: The code used in italian fiscal code |
250
|
|
|
:rtype: string |
251
|
|
|
""" |
252
|
|
|
if not birthplace: |
253
|
|
|
raise ValueError("[codicefiscale] 'birthplace' argument cant be None") |
254
|
|
|
|
255
|
|
|
def find_birthplace_code(birthplace): |
256
|
|
|
birthplace_slug = slugify(birthplace) |
257
|
|
|
birthplace_code = birthplace_slug.upper() |
258
|
|
|
birthplace_data = _DATA["municipalities"].get( |
259
|
|
|
birthplace_slug, |
260
|
|
|
_DATA["countries"].get( |
261
|
|
|
birthplace_slug |
262
|
|
|
), |
263
|
|
|
) |
264
|
|
|
if birthplace_data: |
265
|
|
|
return birthplace_data.get("code", "") |
266
|
|
|
if birthplace_code in _DATA["codes"]: |
267
|
|
|
return birthplace_code |
268
|
|
|
return "" |
269
|
|
|
|
270
|
|
|
birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code( |
271
|
|
|
re.split(r",|\(", birthplace)[0] |
272
|
|
|
) |
273
|
|
|
|
274
|
|
|
if birthplace_code == "": |
275
|
|
|
raise ValueError( |
276
|
|
|
f"[codicefiscale] 'birthplace' argument not mapped to code: ('{birthplace}' -> '')" |
277
|
|
|
) |
278
|
|
|
|
279
|
|
|
return birthplace_code |
280
|
|
|
|
281
|
|
|
|
282
|
|
|
def encode_cin(code): |
283
|
|
|
""" |
284
|
|
|
Encodes cin to the code used in italian fiscal code. |
285
|
|
|
|
286
|
|
|
:param code: The code |
287
|
|
|
:type code: string |
288
|
|
|
|
289
|
|
|
:returns: The code used in italian fiscal code |
290
|
|
|
:rtype: string |
291
|
|
|
""" |
292
|
|
|
if not code: |
293
|
|
|
raise ValueError("[codicefiscale] 'code' argument cant be None") |
294
|
|
|
|
295
|
|
|
code_len = len(code) |
296
|
|
|
if code_len not in [15, 16]: |
297
|
|
|
raise ValueError( |
298
|
|
|
f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}" |
299
|
|
|
) |
300
|
|
|
|
301
|
|
|
cin_tot = 0 |
302
|
|
|
for i, char in enumerate(code[0:15]): |
303
|
|
|
cin_tot += _CIN[char][int(bool((i + 1) % 2))] |
304
|
|
|
cin_code = _CIN_REMAINDERS[cin_tot % 26] |
305
|
|
|
|
306
|
|
|
# print(cin_code) |
307
|
|
|
return cin_code |
308
|
|
|
|
309
|
|
|
|
310
|
|
|
def encode(surname, name, sex, birthdate, birthplace): |
311
|
|
|
""" |
312
|
|
|
Encodes the italian fiscal code. |
313
|
|
|
|
314
|
|
|
:param surname: The surname |
315
|
|
|
:type surname: string |
316
|
|
|
:param name: The name |
317
|
|
|
:type name: string |
318
|
|
|
:param sex: The sex, 'M' or 'F' |
319
|
|
|
:type sex: string |
320
|
|
|
:param birthdate: The birthdate |
321
|
|
|
:type birthdate: datetime or string |
322
|
|
|
:param birthplace: The birthplace |
323
|
|
|
:type birthplace: string |
324
|
|
|
|
325
|
|
|
:returns: The italian fiscal code |
326
|
|
|
:rtype: string |
327
|
|
|
""" |
328
|
|
|
code = "" |
329
|
|
|
code += encode_surname(surname) |
330
|
|
|
code += encode_name(name) |
331
|
|
|
code += encode_birthdate(birthdate, sex) |
332
|
|
|
code += encode_birthplace(birthplace) |
333
|
|
|
code += encode_cin(code) |
334
|
|
|
|
335
|
|
|
# raise ValueError if code is not valid |
336
|
|
|
data = decode(code) |
337
|
|
|
return data["code"] |
338
|
|
|
|
339
|
|
|
|
340
|
|
|
def decode_raw(code): |
341
|
|
|
""" |
342
|
|
|
Decodes the raw data associated to the code. |
343
|
|
|
|
344
|
|
|
:param code: The code |
345
|
|
|
:type code: string |
346
|
|
|
|
347
|
|
|
:returns: The raw data associated to the code. |
348
|
|
|
:rtype: dict |
349
|
|
|
""" |
350
|
|
|
code = slugify(code) |
351
|
|
|
code = code.replace("-", "") |
352
|
|
|
code = code.upper() |
353
|
|
|
|
354
|
|
|
m = CODICEFISCALE_RE.match(code) |
355
|
|
|
if not m: |
356
|
|
|
raise ValueError(f"[codicefiscale] invalid syntax: {code}") |
357
|
|
|
|
358
|
|
|
g = m.groups() |
359
|
|
|
# print(g) |
360
|
|
|
|
361
|
|
|
data = { |
362
|
|
|
"code": code, |
363
|
|
|
"surname": g[0], |
364
|
|
|
"name": g[1], |
365
|
|
|
"birthdate": g[2], |
366
|
|
|
"birthdate_year": g[3], |
367
|
|
|
"birthdate_month": g[4], |
368
|
|
|
"birthdate_day": g[5], |
369
|
|
|
"birthplace": g[6], |
370
|
|
|
"cin": g[7], |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
return data |
374
|
|
|
|
375
|
|
|
|
376
|
|
|
def decode(code): |
377
|
|
|
""" |
378
|
|
|
Decodes the italian fiscal code. |
379
|
|
|
|
380
|
|
|
:param code: The code |
381
|
|
|
:type code: string |
382
|
|
|
|
383
|
|
|
:returns: The data associated to the code and some additional info. |
384
|
|
|
:rtype: dict |
385
|
|
|
""" |
386
|
|
|
raw = decode_raw(code) |
387
|
|
|
|
388
|
|
|
code = raw["code"] |
389
|
|
|
|
390
|
|
|
birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS) |
391
|
|
|
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1 |
392
|
|
|
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS)) |
393
|
|
|
|
394
|
|
|
if birthdate_day > 40: |
395
|
|
|
birthdate_day -= 40 |
396
|
|
|
sex = "F" |
397
|
|
|
else: |
398
|
|
|
sex = "M" |
399
|
|
|
|
400
|
|
|
current_year = datetime.now().year |
401
|
|
|
current_year_century_prefix = str(current_year)[0:-2] |
402
|
|
|
birthdate_year_int = int(f"{current_year_century_prefix}{birthdate_year}") |
403
|
|
|
if birthdate_year_int > current_year: |
404
|
|
|
birthdate_year_int -= 100 |
405
|
|
|
birthdate_year = str(birthdate_year_int) |
406
|
|
|
birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}" |
407
|
|
|
try: |
408
|
|
|
birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d") |
409
|
|
|
except ValueError: |
410
|
|
|
raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}") |
411
|
|
|
|
412
|
|
|
codes = _DATA["codes"].get(raw["birthplace"][0] + raw["birthplace"][1:].translate(_OMOCODIA_DECODE_TRANS)) |
413
|
|
|
birthplace = None |
414
|
|
|
for c in codes: |
415
|
|
|
date_created = datetime.min |
416
|
|
|
try: |
417
|
|
|
date_created = datetime.strptime(c['date_created'], "%Y-%m-%d") |
418
|
|
|
except ValueError: |
419
|
|
|
date_created = datetime.min |
420
|
|
|
|
421
|
|
|
try: |
422
|
|
|
date_deleted = datetime.strptime(c['date_deleted'], "%Y-%m-%d") |
423
|
|
|
except ValueError: |
424
|
|
|
date_deleted = datetime.max |
425
|
|
|
if date_created <= birthdate and date_deleted >= birthdate: |
426
|
|
|
birthplace = c |
427
|
|
|
break |
428
|
|
|
|
429
|
|
|
cin = raw["cin"] |
430
|
|
|
cin_check = encode_cin(code) |
431
|
|
|
# print(cin, cin_check) |
432
|
|
|
if cin != cin_check: |
433
|
|
|
raise ValueError( |
434
|
|
|
f"[codicefiscale] wrong CIN (Control Internal Number): expected '{cin_check}', found '{cin}'" |
435
|
|
|
) |
436
|
|
|
|
437
|
|
|
data = { |
438
|
|
|
"code": code, |
439
|
|
|
"omocodes": _get_omocodes(code), |
440
|
|
|
"sex": sex, |
441
|
|
|
"birthdate": birthdate, |
442
|
|
|
"birthplace": birthplace, |
443
|
|
|
"raw": raw, |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
# print(data) |
447
|
|
|
return data |
448
|
|
|
|
449
|
|
|
|
450
|
|
|
def is_omocode(code): |
451
|
|
|
""" |
452
|
|
|
Determines whether the specified code is omocode or not. |
453
|
|
|
|
454
|
|
|
:param code: The code |
455
|
|
|
:type code: string |
456
|
|
|
|
457
|
|
|
:returns: True if the specified code is omocode, False otherwise. |
458
|
|
|
:rtype: boolean |
459
|
|
|
""" |
460
|
|
|
data = decode(code) |
461
|
|
|
codes = data["omocodes"] |
462
|
|
|
codes.pop(0) |
463
|
|
|
return code in codes |
464
|
|
|
|
465
|
|
|
|
466
|
|
|
def is_valid(code): |
467
|
|
|
""" |
468
|
|
|
Determines whether the specified code is valid. |
469
|
|
|
|
470
|
|
|
:param code: The code |
471
|
|
|
:type code: string |
472
|
|
|
|
473
|
|
|
:returns: True if the specified code is valid, False otherwise. |
474
|
|
|
:rtype: boolean |
475
|
|
|
""" |
476
|
|
|
try: |
477
|
|
|
decode(code) |
478
|
|
|
return True |
479
|
|
|
except ValueError: |
480
|
|
|
return False |
481
|
|
|
|