Passed
Push — master ( d7a7a9...fa1f03 )
by Fabio
01:07 queued 12s
created

scripts.updatedata   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 156
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 12
eloc 113
dl 0
loc 156
rs 10
c 0
b 0
f 0

3 Functions

Rating   Name   Duplication   Size   Complexity  
C _update_countries_data() 0 73 8
A main() 0 3 1
B _update_municipalities_data() 0 62 3
1
# -*- coding: utf-8 -*-
2
3
from benedict import benedict
4
from openpyxl import load_workbook
5
from slugify import slugify
6
7
import fsutil
8
9
10
def _update_countries_data():
11
    # https://www.anagrafenazionale.interno.it/il-progetto/strumenti-di-lavoro/tabelle-decodifica/
12
    data_url = "https://www.anagrafenazionale.interno.it/wp-content/uploads/2021/03/tabella_2_statiesteri.xlsx"
13
    data_path = fsutil.download_file(data_url, __file__, filename="countries.xlsx")
14
15
    workbook = load_workbook(filename=data_path, read_only=True)
16
    sheet = workbook.active
17
18
    items = []
19
    keys = []
20
    for row in sheet.iter_rows(min_row=1, max_row=1):
21
        keys = [cell.value for cell in row]
22
    for row in sheet.iter_rows(min_row=2):
23
        values = list([cell.value for cell in row])
24
        items.append(dict(zip(keys, values)))
25
26
    workbook.close()
27
    fsutil.remove_file(data_path)
28
29
    data = benedict({"values": items})
30
    data.standardize()
31
    # print(data.dump())
32
33
    def map_item(item):
34
        if not item:
35
            return None
36
        code = item.get_str("codat").upper()
37
        if not code:
38
            return None
39
        assert len(code) == 4, f"Invalid code: '{code}'"
40
41
        name = item.get_str("denominazione").title()
42
        assert name != "", f"Invalid name: '{name}'"
43
        name_alt = item.get_str("denominazioneistat").title()
44
        name_alt_en = item.get_str("denominazioneistat_en").title()
45
        name_slugs = sorted(
46
            set(
47
                filter(
48
                    bool,
49
                    [
50
                        slugify(name),
51
                        slugify(name_alt),
52
                        slugify(name_alt_en),
53
                    ],
54
                )
55
            )
56
        )
57
        province = "EE"
58
59
        date_created = item.get_str("datainiziovalidita")
60
        date_deleted = item.get_str("datafinevalidita")
61
        if "9999" in date_deleted:
62
            date_deleted = ""
63
64
        return {
65
            "active": False if date_deleted else True,
66
            "code": code,
67
            "date_created": date_created,
68
            "date_deleted": date_deleted,
69
            "name": name,
70
            "name_alt": name_alt,
71
            "name_alt_en": name_alt_en,
72
            "name_slugs": name_slugs,
73
            "province": province,
74
        }
75
76
    output_data = list(
77
        filter(bool, [map_item(benedict(item)) for item in data["values"]])
78
    )
79
    output_data = sorted(output_data, key=lambda item: item["name"])
80
    output_path = "../codicefiscale/data/countries.json"
81
    output_abspath = fsutil.join_path(__file__, output_path)
82
    fsutil.write_file_json(output_abspath, output_data, indent=4, sort_keys=True)
83
84
85
def _update_municipalities_data():
86
    # https://www.anagrafenazionale.interno.it/il-progetto/strumenti-di-lavoro/tabelle-decodifica/
87
    data_url = "https://www.anagrafenazionale.interno.it/wp-content/uploads/ANPR_archivio_comuni.csv"
88
    data = benedict.from_csv(data_url)
89
    data.standardize()
90
91
    def map_item(item):
92
93
        status = item.get("stato", "").upper()
94
        assert len(status) == 1 and status in ["A", "C"], f"Invalid status: '{status}'"
95
        active = status == "A"
96
97
        code = item.get_str("codcatastale").upper()
98
        assert code == "ND" or len(code) == 4, f"Invalid code: '{code}'"
99
100
        name = item.get_str("denominazione_it").title()
101
        assert name != "", f"Invalid name: {name}"
102
103
        name_trans = item.get_str("denomtraslitterata").title()
104
        name_alt = item.get_str("altradenominazione").title()
105
        name_alt_trans = item.get_str("altradenomtraslitterata").title()
106
        name_slugs = sorted(
107
            set(
108
                filter(
109
                    bool,
110
                    [
111
                        slugify(name),
112
                        slugify(name_trans),
113
                        slugify(name_alt),
114
                        slugify(name_alt_trans),
115
                    ],
116
                )
117
            )
118
        )
119
        province = item.get("siglaprovincia", "").upper()
120
        assert len(province) == 2, f"Invalid province: '{province}'"
121
122
        date_created = item.get_str("dataistituzione")
123
        date_deleted = item.get_str("datacessazione")
124
        if "9999" in date_deleted:
125
            date_deleted = ""
126
127
        return {
128
            "active": active,
129
            "code": code,
130
            "date_created": date_created,
131
            "date_deleted": date_deleted,
132
            "name": name,
133
            "name_trans": name_trans,
134
            "name_alt": name_alt,
135
            "name_alt_trans": name_alt_trans,
136
            "name_slugs": name_slugs,
137
            "province": province,
138
        }
139
140
    output_data = list(
141
        filter(bool, [map_item(benedict(item)) for item in data["values"]])
142
    )
143
    output_data = sorted(output_data, key=lambda item: item["name"])
144
    output_path = "../codicefiscale/data/municipalities.json"
145
    output_abspath = fsutil.join_path(__file__, output_path)
146
    fsutil.write_file_json(output_abspath, output_data, indent=4, sort_keys=True)
147
148
149
def main():
150
    _update_countries_data()
151
    _update_municipalities_data()
152
153
154
if __name__ == "__main__":
155
    main()
156