1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
from datetime import datetime |
4
|
|
|
from dateutil import parser as date_parser |
5
|
|
|
from itertools import combinations |
6
|
|
|
from slugify import slugify |
7
|
|
|
|
8
|
|
|
import fsutil |
9
|
|
|
import re |
10
|
|
|
import string |
11
|
|
|
|
12
|
|
|
try: |
13
|
|
|
maketrans = "".maketrans |
14
|
|
|
except AttributeError: |
15
|
|
|
# fallback for Python 2 |
16
|
|
|
maketrans = string.maketrans |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
_CONSONANTS = list("bcdfghjklmnpqrstvwxyz") |
20
|
|
|
_VOWELS = list("aeiou") |
21
|
|
|
_MONTHS = list("ABCDEHLMPRST") |
22
|
|
|
_CIN = { |
23
|
|
|
"0": (0, 1), |
24
|
|
|
"1": (1, 0), |
25
|
|
|
"2": (2, 5), |
26
|
|
|
"3": (3, 7), |
27
|
|
|
"4": (4, 9), |
28
|
|
|
"5": (5, 13), |
29
|
|
|
"6": (6, 15), |
30
|
|
|
"7": (7, 17), |
31
|
|
|
"8": (8, 19), |
32
|
|
|
"9": (9, 21), |
33
|
|
|
"A": (0, 1), |
34
|
|
|
"B": (1, 0), |
35
|
|
|
"C": (2, 5), |
36
|
|
|
"D": (3, 7), |
37
|
|
|
"E": (4, 9), |
38
|
|
|
"F": (5, 13), |
39
|
|
|
"G": (6, 15), |
40
|
|
|
"H": (7, 17), |
41
|
|
|
"I": (8, 19), |
42
|
|
|
"J": (9, 21), |
43
|
|
|
"K": (10, 2), |
44
|
|
|
"L": (11, 4), |
45
|
|
|
"M": (12, 18), |
46
|
|
|
"N": (13, 20), |
47
|
|
|
"O": (14, 11), |
48
|
|
|
"P": (15, 3), |
49
|
|
|
"Q": (16, 6), |
50
|
|
|
"R": (17, 8), |
51
|
|
|
"S": (18, 12), |
52
|
|
|
"T": (19, 14), |
53
|
|
|
"U": (20, 16), |
54
|
|
|
"V": (21, 10), |
55
|
|
|
"W": (22, 22), |
56
|
|
|
"X": (23, 25), |
57
|
|
|
"Y": (24, 24), |
58
|
|
|
"Z": (25, 23), |
59
|
|
|
} |
60
|
|
|
_CIN_REMAINDERS = list(string.ascii_uppercase) |
61
|
|
|
|
62
|
|
|
_OMOCODIA = { |
63
|
|
|
"0": "L", |
64
|
|
|
"1": "M", |
65
|
|
|
"2": "N", |
66
|
|
|
"3": "P", |
67
|
|
|
"4": "Q", |
68
|
|
|
"5": "R", |
69
|
|
|
"6": "S", |
70
|
|
|
"7": "T", |
71
|
|
|
"8": "U", |
72
|
|
|
"9": "V", |
73
|
|
|
} |
74
|
|
|
_OMOCODIA_DIGITS = "".join([digit for digit in _OMOCODIA]) |
75
|
|
|
_OMOCODIA_LETTERS = "".join([_OMOCODIA[digit] for digit in _OMOCODIA]) |
76
|
|
|
_OMOCODIA_ENCODE_TRANS = maketrans(_OMOCODIA_DIGITS, _OMOCODIA_LETTERS) |
77
|
|
|
_OMOCODIA_DECODE_TRANS = maketrans(_OMOCODIA_LETTERS, _OMOCODIA_DIGITS) |
78
|
|
|
_OMOCODIA_SUBS_INDEXES = list(reversed([6, 7, 9, 10, 12, 13, 14])) |
79
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS = [[]] |
80
|
|
|
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1): |
81
|
|
|
for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size): |
82
|
|
|
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo)) |
83
|
|
|
|
84
|
|
|
|
85
|
|
|
def _get_data(filename): |
86
|
|
|
return fsutil.read_file_json(fsutil.join_path(__file__, "data/{}".format(filename))) |
87
|
|
|
|
88
|
|
|
|
89
|
|
|
def _get_indexed_data(): |
90
|
|
|
municipalities = _get_data("municipalities.json") |
91
|
|
|
countries = _get_data("countries.json") |
92
|
|
|
data = { |
93
|
|
|
"municipalities": {}, |
94
|
|
|
"countries": {}, |
95
|
|
|
"codes": {}, |
96
|
|
|
} |
97
|
|
|
deleted_suffix = "(soppresso)" |
98
|
|
|
for municipality in municipalities: |
99
|
|
|
code = municipality["code"] |
100
|
|
|
names = municipality["name"].replace(deleted_suffix, "").strip().split("/") |
101
|
|
|
province = municipality["province"].lower() |
102
|
|
|
for name in names: |
103
|
|
|
key = slugify(name) |
104
|
|
|
data["municipalities"][key] = municipality |
105
|
|
|
data["municipalities"][key + "-" + province] = municipality |
106
|
|
|
if code not in data["codes"] or deleted_suffix not in municipality["name"]: |
107
|
|
|
data["codes"][code] = municipality |
108
|
|
|
|
109
|
|
|
for country in countries: |
110
|
|
|
code = country["code"] |
111
|
|
|
names = country["name"].strip().split("/") |
112
|
|
|
for name in names: |
113
|
|
|
key = slugify(name) |
114
|
|
|
data["countries"][key] = country |
115
|
|
|
data["codes"][code] = country |
116
|
|
|
|
117
|
|
|
return data |
118
|
|
|
|
119
|
|
|
|
120
|
|
|
_DATA = _get_indexed_data() |
121
|
|
|
|
122
|
|
|
CODICEFISCALE_RE = re.compile( |
123
|
|
|
r"^" |
124
|
|
|
r"([a-z]{3})" |
125
|
|
|
r"([a-z]{3})" |
126
|
|
|
r"(([a-z\d]{2})([abcdehlmprst]{1})([a-z\d]{2}))" |
127
|
|
|
r"([a-z]{1}[a-z\d]{3})" |
128
|
|
|
r"([a-z]{1})$", |
129
|
|
|
re.IGNORECASE, |
130
|
|
|
) |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
def _get_consonants(s): |
134
|
|
|
return [char for char in s if char in _CONSONANTS] |
135
|
|
|
|
136
|
|
|
|
137
|
|
|
def _get_vowels(s): |
138
|
|
|
return [char for char in s if char in _VOWELS] |
139
|
|
|
|
140
|
|
|
|
141
|
|
|
def _get_consonants_and_vowels(consonants, vowels): |
142
|
|
|
return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper() |
143
|
|
|
|
144
|
|
|
|
145
|
|
|
def _get_omocode(code, subs, trans): |
146
|
|
|
code_chars = list(code[0:15]) |
147
|
|
|
for i in subs: |
148
|
|
|
code_chars[i] = code_chars[i].translate(trans) |
149
|
|
|
code = "".join(code_chars) |
150
|
|
|
code_cin = encode_cin(code) |
151
|
|
|
code += code_cin |
152
|
|
|
return code |
153
|
|
|
|
154
|
|
|
|
155
|
|
|
def _get_omocodes(code): |
156
|
|
|
code_root = _get_omocode( |
157
|
|
|
code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS |
158
|
|
|
) |
159
|
|
|
codes = [ |
160
|
|
|
_get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS) |
161
|
|
|
for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS |
162
|
|
|
] |
163
|
|
|
return codes |
164
|
|
|
|
165
|
|
|
|
166
|
|
|
def encode_surname(surname): |
167
|
|
|
""" |
168
|
|
|
Encode surname to the code used in italian fiscal code. |
169
|
|
|
|
170
|
|
|
:param surname: The surname |
171
|
|
|
:type surname: string |
172
|
|
|
|
173
|
|
|
:returns: The code used in italian fiscal code |
174
|
|
|
:rtype: string |
175
|
|
|
""" |
176
|
|
|
surname_slug = slugify(surname) |
177
|
|
|
surname_consonants = _get_consonants(surname_slug) |
178
|
|
|
surname_vowels = _get_vowels(surname_slug) |
179
|
|
|
surname_code = _get_consonants_and_vowels(surname_consonants, surname_vowels) |
180
|
|
|
return surname_code |
181
|
|
|
|
182
|
|
|
|
183
|
|
|
def encode_name(name): |
184
|
|
|
""" |
185
|
|
|
Encodes name to the code used in italian fiscal code. |
186
|
|
|
|
187
|
|
|
:param name: The name |
188
|
|
|
:type name: string |
189
|
|
|
|
190
|
|
|
:returns: The code used in italian fiscal code |
191
|
|
|
:rtype: string |
192
|
|
|
""" |
193
|
|
|
name_slug = slugify(name) |
194
|
|
|
name_consonants = _get_consonants(name_slug) |
195
|
|
|
|
196
|
|
|
if len(name_consonants) > 3: |
197
|
|
|
del name_consonants[1] |
198
|
|
|
|
199
|
|
|
name_vowels = _get_vowels(name_slug) |
200
|
|
|
name_code = _get_consonants_and_vowels(name_consonants, name_vowels) |
201
|
|
|
return name_code |
202
|
|
|
|
203
|
|
|
|
204
|
|
|
def encode_birthdate(birthdate, sex): |
205
|
|
|
""" |
206
|
|
|
Encodes birthdate to the code used in italian fiscal code. |
207
|
|
|
|
208
|
|
|
:param birthdate: The birthdate |
209
|
|
|
:type birthdate: datetime or string |
210
|
|
|
:param sex: The sex, 'M' or 'F' |
211
|
|
|
:type sex: string |
212
|
|
|
|
213
|
|
|
:returns: The code used in italian fiscal code |
214
|
|
|
:rtype: string |
215
|
|
|
""" |
216
|
|
|
if not birthdate: |
217
|
|
|
raise ValueError("[codicefiscale] 'birthdate' argument cant be None") |
218
|
|
|
|
219
|
|
|
if not sex: |
220
|
|
|
raise ValueError("[codicefiscale] 'sex' argument cant be None") |
221
|
|
|
|
222
|
|
|
sex = sex.upper() |
223
|
|
|
|
224
|
|
|
if sex not in ["M", "F"]: |
225
|
|
|
raise ValueError("[codicefiscale] 'sex' argument must be 'M' or 'F'") |
226
|
|
|
|
227
|
|
|
if isinstance(birthdate, datetime): |
228
|
|
|
date_obj = birthdate |
229
|
|
|
else: |
230
|
|
|
date_slug = slugify(birthdate) |
231
|
|
|
date_parts = date_slug.split("-")[:3] |
232
|
|
|
date_kwargs = ( |
233
|
|
|
{"yearfirst": True} if len(date_parts[0]) == 4 else {"dayfirst": True} |
234
|
|
|
) |
235
|
|
|
try: |
236
|
|
|
date_obj = date_parser.parse(date_slug, **date_kwargs) |
237
|
|
|
except ValueError as e: |
238
|
|
|
raise ValueError("[codicefiscale] {}".format(e)) |
239
|
|
|
|
240
|
|
|
year_code = str(date_obj.year)[2:] |
241
|
|
|
month_code = _MONTHS[date_obj.month - 1] |
242
|
|
|
day_code = str(date_obj.day + (40 if sex == "F" else 0)).zfill(2).upper() |
243
|
|
|
date_code = year_code + month_code + day_code |
244
|
|
|
return date_code |
245
|
|
|
|
246
|
|
|
|
247
|
|
|
def encode_birthplace(birthplace): |
248
|
|
|
""" |
249
|
|
|
Encodes birthplace to the code used in italian fiscal code. |
250
|
|
|
|
251
|
|
|
:param birthplace: The birthplace |
252
|
|
|
:type birthplace: string |
253
|
|
|
|
254
|
|
|
:returns: The code used in italian fiscal code |
255
|
|
|
:rtype: string |
256
|
|
|
""" |
257
|
|
|
if not birthplace: |
258
|
|
|
raise ValueError("[codicefiscale] 'birthplace' argument cant be None") |
259
|
|
|
|
260
|
|
|
def find_birthplace_code(birthplace): |
261
|
|
|
birthplace_slug = slugify(birthplace) |
262
|
|
|
birthplace_code = birthplace_slug.upper() |
263
|
|
|
birthplace_data = _DATA["municipalities"].get( |
264
|
|
|
birthplace_slug, |
265
|
|
|
_DATA["countries"].get( |
266
|
|
|
birthplace_slug, _DATA["codes"].get(birthplace_code, {}) |
267
|
|
|
), |
268
|
|
|
) |
269
|
|
|
return birthplace_data.get("code", "") |
270
|
|
|
|
271
|
|
|
birthplace_code = find_birthplace_code(birthplace) or find_birthplace_code( |
272
|
|
|
re.split(r",|\(", birthplace)[0] |
273
|
|
|
) |
274
|
|
|
|
275
|
|
|
if birthplace_code == "": |
276
|
|
|
raise ValueError( |
277
|
|
|
"[codicefiscale] 'birthplace' argument not mapped to code: ('{}'' -> '')".format( |
278
|
|
|
birthplace |
279
|
|
|
) |
280
|
|
|
) |
281
|
|
|
|
282
|
|
|
return birthplace_code |
283
|
|
|
|
284
|
|
|
|
285
|
|
|
def encode_cin(code): |
286
|
|
|
""" |
287
|
|
|
Encodes cin to the code used in italian fiscal code. |
288
|
|
|
|
289
|
|
|
:param code: The code |
290
|
|
|
:type code: string |
291
|
|
|
|
292
|
|
|
:returns: The code used in italian fiscal code |
293
|
|
|
:rtype: string |
294
|
|
|
""" |
295
|
|
|
if not code: |
296
|
|
|
raise ValueError("[codicefiscale] 'code' argument cant be None") |
297
|
|
|
|
298
|
|
|
code_len = len(code) |
299
|
|
|
if code_len not in [15, 16]: |
300
|
|
|
raise ValueError( |
301
|
|
|
"[codicefiscale] 'code' length must be 15 or 16, not: {}".format(code_len) |
302
|
|
|
) |
303
|
|
|
|
304
|
|
|
cin_tot = 0 |
305
|
|
|
for i, char in enumerate(code[0:15]): |
306
|
|
|
cin_tot += _CIN[char][int(bool((i + 1) % 2))] |
307
|
|
|
cin_code = _CIN_REMAINDERS[cin_tot % 26] |
308
|
|
|
|
309
|
|
|
# print(cin_code) |
310
|
|
|
return cin_code |
311
|
|
|
|
312
|
|
|
|
313
|
|
|
def encode(surname, name, sex, birthdate, birthplace): |
314
|
|
|
""" |
315
|
|
|
Encodes the italian fiscal code. |
316
|
|
|
|
317
|
|
|
:param surname: The surname |
318
|
|
|
:type surname: string |
319
|
|
|
:param name: The name |
320
|
|
|
:type name: string |
321
|
|
|
:param sex: The sex, 'M' or 'F' |
322
|
|
|
:type sex: string |
323
|
|
|
:param birthdate: The birthdate |
324
|
|
|
:type birthdate: datetime or string |
325
|
|
|
:param birthplace: The birthplace |
326
|
|
|
:type birthplace: string |
327
|
|
|
|
328
|
|
|
:returns: The italian fiscal code |
329
|
|
|
:rtype: string |
330
|
|
|
""" |
331
|
|
|
code = "" |
332
|
|
|
code += encode_surname(surname) |
333
|
|
|
code += encode_name(name) |
334
|
|
|
code += encode_birthdate(birthdate, sex) |
335
|
|
|
code += encode_birthplace(birthplace) |
336
|
|
|
code += encode_cin(code) |
337
|
|
|
|
338
|
|
|
# raise ValueError if code is not valid |
339
|
|
|
data = decode(code) |
340
|
|
|
return data["code"] |
341
|
|
|
|
342
|
|
|
|
343
|
|
|
def decode_raw(code): |
344
|
|
|
""" |
345
|
|
|
Decodes the raw data associated to the code. |
346
|
|
|
|
347
|
|
|
:param code: The code |
348
|
|
|
:type code: string |
349
|
|
|
|
350
|
|
|
:returns: The raw data associated to the code. |
351
|
|
|
:rtype: dict |
352
|
|
|
""" |
353
|
|
|
code = slugify(code) |
354
|
|
|
code = code.replace("-", "") |
355
|
|
|
code = code.upper() |
356
|
|
|
|
357
|
|
|
m = CODICEFISCALE_RE.match(code) |
358
|
|
|
if not m: |
359
|
|
|
raise ValueError("[codicefiscale] invalid syntax: {}".format(code)) |
360
|
|
|
|
361
|
|
|
g = m.groups() |
362
|
|
|
# print(g) |
363
|
|
|
|
364
|
|
|
data = { |
365
|
|
|
"code": code, |
366
|
|
|
"surname": g[0], |
367
|
|
|
"name": g[1], |
368
|
|
|
"birthdate": g[2], |
369
|
|
|
"birthdate_year": g[3], |
370
|
|
|
"birthdate_month": g[4], |
371
|
|
|
"birthdate_day": g[5], |
372
|
|
|
"birthplace": g[6], |
373
|
|
|
"cin": g[7], |
374
|
|
|
} |
375
|
|
|
|
376
|
|
|
return data |
377
|
|
|
|
378
|
|
|
|
379
|
|
|
def decode(code): |
380
|
|
|
""" |
381
|
|
|
Decodes the italian fiscal code. |
382
|
|
|
|
383
|
|
|
:param code: The code |
384
|
|
|
:type code: string |
385
|
|
|
|
386
|
|
|
:returns: The data associated to the code and some additional info. |
387
|
|
|
:rtype: dict |
388
|
|
|
""" |
389
|
|
|
raw = decode_raw(code) |
390
|
|
|
|
391
|
|
|
code = raw["code"] |
392
|
|
|
|
393
|
|
|
birthdate_year = raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS) |
394
|
|
|
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1 |
395
|
|
|
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS)) |
396
|
|
|
|
397
|
|
|
if birthdate_day > 40: |
398
|
|
|
birthdate_day -= 40 |
399
|
|
|
sex = "F" |
400
|
|
|
else: |
401
|
|
|
sex = "M" |
402
|
|
|
|
403
|
|
|
current_year = datetime.now().year |
404
|
|
|
birthdate_year_int = int("{}{}".format(str(current_year)[0:-2], birthdate_year)) |
405
|
|
|
if birthdate_year_int > current_year: |
406
|
|
|
birthdate_year_int -= 100 |
407
|
|
|
birthdate_year = str(birthdate_year_int) |
408
|
|
|
birthdate_str = "{}/{}/{}".format(birthdate_year, birthdate_month, birthdate_day) |
409
|
|
|
try: |
410
|
|
|
birthdate = datetime.strptime(birthdate_str, "%Y/%m/%d") |
411
|
|
|
except ValueError: |
412
|
|
|
raise ValueError("[codicefiscale] invalid date: {}".format(birthdate_str)) |
413
|
|
|
|
414
|
|
|
birthplace = _DATA["codes"].get( |
415
|
|
|
raw["birthplace"][0] + raw["birthplace"][1:].translate(_OMOCODIA_DECODE_TRANS) |
416
|
|
|
) |
417
|
|
|
|
418
|
|
|
cin = raw["cin"] |
419
|
|
|
cin_check = encode_cin(code) |
420
|
|
|
# print(cin, cin_check) |
421
|
|
|
if cin != cin_check: |
422
|
|
|
raise ValueError( |
423
|
|
|
"[codicefiscale] wrong CIN (Control Internal Number): expected '{}', found '{}'".format( |
424
|
|
|
cin_check, cin |
425
|
|
|
) |
426
|
|
|
) |
427
|
|
|
|
428
|
|
|
data = { |
429
|
|
|
"code": code, |
430
|
|
|
"omocodes": _get_omocodes(code), |
431
|
|
|
"sex": sex, |
432
|
|
|
"birthdate": birthdate, |
433
|
|
|
"birthplace": birthplace, |
434
|
|
|
"raw": raw, |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
# print(data) |
438
|
|
|
return data |
439
|
|
|
|
440
|
|
|
|
441
|
|
|
def is_omocode(code): |
442
|
|
|
""" |
443
|
|
|
Determines whether the specified code is omocode or not. |
444
|
|
|
|
445
|
|
|
:param code: The code |
446
|
|
|
:type code: string |
447
|
|
|
|
448
|
|
|
:returns: True if the specified code is omocode, False otherwise. |
449
|
|
|
:rtype: boolean |
450
|
|
|
""" |
451
|
|
|
data = decode(code) |
452
|
|
|
codes = data["omocodes"] |
453
|
|
|
codes.pop(0) |
454
|
|
|
return code in codes |
455
|
|
|
|
456
|
|
|
|
457
|
|
|
def is_valid(code): |
458
|
|
|
""" |
459
|
|
|
Determines whether the specified code is valid. |
460
|
|
|
|
461
|
|
|
:param code: The code |
462
|
|
|
:type code: string |
463
|
|
|
|
464
|
|
|
:returns: True if the specified code is valid, False otherwise. |
465
|
|
|
:rtype: boolean |
466
|
|
|
""" |
467
|
|
|
try: |
468
|
|
|
decode(code) |
469
|
|
|
return True |
470
|
|
|
except ValueError: |
471
|
|
|
return False |
472
|
|
|
|