scripts.updatedata   A
last analyzed

Complexity

Total Complexity 13

Size/Duplication

Total Lines 159
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 13
eloc 116
dl 0
loc 159
rs 10
c 0
b 0
f 0

6 Functions

Rating   Name   Duplication   Size   Complexity  
B _update_countries_data() 0 57 5
A main() 0 3 1
B _update_municipalities_data() 0 65 3
A _slugify_names() 0 2 1
A _expect_keys() 0 5 1
A _write_data_json() 0 5 2
1
import fsutil
2
from benedict import benedict
3
from slugify import slugify
4
5
6
def _expect_keys(d, keys):
7
    missing_keys = list(set(keys) - set(d.keys()))
8
    assert (
9
        not missing_keys
10
    ), f"Invalid keys, missing one or more expected keys {missing_keys}."
11
12
13
def _slugify_names(*names):
14
    return sorted(set(filter(bool, [slugify(name) for name in names])))
15
16
17
def _update_countries_data():
18
    # https://www.anagrafenazionale.interno.it/area-tecnica/tabelle-di-decodifica/
19
    data_url = "https://www.anagrafenazionale.interno.it/wp-content/uploads/2022/10/tabella_2_statiesteri.xlsx"
20
    data = benedict.from_xls(data_url)
21
    data.standardize()
22
    # print(data.dump())
23
24
    def map_item(item):
25
        if not item:
26
            return None
27
28
        _expect_keys(
29
            item,
30
            [
31
                "codat",
32
                "denominazione",
33
                "denominazioneistat",
34
                "denominazioneistat_en",
35
                "datainiziovalidita",
36
                "datafinevalidita",
37
            ],
38
        )
39
40
        code = item.get_str("codat").upper()
41
        if not code:
42
            return None
43
        assert len(code) == 4, f"Invalid code: '{code}'"
44
45
        name = item.get_str("denominazione").title()
46
        assert name != "", f"Invalid name: '{name}'"
47
        name_alt = item.get_str("denominazioneistat").title()
48
        name_alt_en = item.get_str("denominazioneistat_en").title()
49
        name_slugs = _slugify_names(name, name_alt, name_alt_en)
50
51
        province = "EE"
52
53
        date_created = item.get_datetime("datainiziovalidita")
54
        date_deleted = item.get_datetime("datafinevalidita")
55
        date_deleted_raw = item.get_str("datafinevalidita")
56
        if "9999" in date_deleted_raw:
57
            date_deleted = ""
58
59
        return {
60
            "active": False if date_deleted else True,
61
            "code": code,
62
            "date_created": date_created,
63
            "date_deleted": date_deleted,
64
            "name": name,
65
            "name_alt": name_alt,
66
            "name_alt_en": name_alt_en,
67
            "name_slugs": name_slugs,
68
            "province": province,
69
        }
70
71
    _write_data_json(
72
        filepath="../codicefiscale/data/countries.json",
73
        data=[map_item(benedict(item)) for item in data["values"]],
74
    )
75
76
77
def _update_municipalities_data():
78
    # https://www.anagrafenazionale.interno.it/area-tecnica/tabelle-di-decodifica/
79
    data_url = "https://www.anagrafenazionale.interno.it/wp-content/uploads/2022/12/ANPR_archivio_comuni.csv"
80
    data = benedict.from_csv(data_url)
81
    data.standardize()
82
83
    def map_item(item):
84
        if not item:
85
            return None
86
87
        _expect_keys(
88
            item,
89
            [
90
                "stato",
91
                "codcatastale",
92
                "denominazione_it",
93
                "denomtraslitterata",
94
                "altradenominazione",
95
                "altradenomtraslitterata",
96
                "siglaprovincia",
97
                "dataistituzione",
98
                "datacessazione",
99
            ],
100
        )
101
102
        status = item.get("stato", "").upper()
103
        assert len(status) == 1 and status in ["A", "C"], f"Invalid status: '{status}'"
104
        active = status == "A"
105
106
        code = item.get_str("codcatastale").upper()
107
        assert code == "ND" or len(code) == 4, f"Invalid code: '{code}'"
108
109
        name = item.get_str("denominazione_it").title()
110
        assert name != "", f"Invalid name: {name}"
111
112
        name_trans = item.get_str("denomtraslitterata").title()
113
        name_alt = item.get_str("altradenominazione").title()
114
        name_alt_trans = item.get_str("altradenomtraslitterata").title()
115
        name_slugs = _slugify_names(name, name_trans, name_alt, name_alt_trans)
116
117
        province = item.get("siglaprovincia", "").upper()
118
        assert len(province) == 2, f"Invalid province: '{province}'"
119
120
        date_created = item.get_datetime("dataistituzione")
121
        date_deleted = item.get_datetime("datacessazione")
122
        date_deleted_raw = item.get_str("datacessazione")
123
        if "9999" in date_deleted_raw:
124
            date_deleted = ""
125
126
        return {
127
            "active": active,
128
            "code": code,
129
            "date_created": date_created,
130
            "date_deleted": date_deleted,
131
            "name": name,
132
            "name_trans": name_trans,
133
            "name_alt": name_alt,
134
            "name_alt_trans": name_alt_trans,
135
            "name_slugs": name_slugs,
136
            "province": province,
137
        }
138
139
    _write_data_json(
140
        filepath="../codicefiscale/data/municipalities.json",
141
        data=[map_item(benedict(item)) for item in data["values"]],
142
    )
143
144
145
def _write_data_json(filepath, data):
146
    data = list(filter(bool, data))
147
    data = sorted(data, key=lambda item: item["name"])
148
    data_filepath = fsutil.join_path(__file__, filepath)
149
    fsutil.write_file_json(data_filepath, data, indent=4, sort_keys=True)
150
151
152
def main():
153
    _update_countries_data()
154
    _update_municipalities_data()
155
156
157
if __name__ == "__main__":
158
    main()
159