1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
2
|
|
|
|
3
|
|
|
try: |
4
|
|
|
import unittest |
5
|
|
|
import logging |
6
|
|
|
import sys |
7
|
|
|
from osm_poi_matchmaker.libs.address import extract_street_housenumber_better_2, extract_all_address, \ |
|
|
|
|
8
|
|
|
clean_opening_hours, clean_opening_hours_2, clean_phone, clean_phone_to_str, clean_string, clean_url, \ |
|
|
|
|
9
|
|
|
clean_city |
10
|
|
|
except ImportError as err: |
11
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
12
|
|
|
logging.exception('Exception occurred') |
13
|
|
|
|
14
|
|
|
sys.exit(128) |
15
|
|
|
|
16
|
|
|
|
17
|
|
|
class TestAddressResolver(unittest.TestCase): |
|
|
|
|
18
|
|
|
def setUp(self): |
19
|
|
|
self.addresses = [ |
20
|
|
|
{'original': 'Gránátos u. 11.', 'street': 'Gránátos utca', 'housenumber': '11', 'conscriptionnumber': None}, |
|
|
|
|
21
|
|
|
{'original': 'BERCSÉNYI U.1 2934/5 HRSZ', 'street': 'Bercsényi Miklós utca', 'housenumber': '1', |
|
|
|
|
22
|
|
|
'conscriptionnumber': '2934/5'}, |
23
|
|
|
{'original': 'Szérűskert utca 018910/23. hrsz. (Köles utca 1.)', 'street': 'Szérűskert utca', |
|
|
|
|
24
|
|
|
'housenumber': None, |
25
|
|
|
'conscriptionnumber': '018910/23'}, |
26
|
|
|
{'original': 'Palotai út 6. (Fehér Palota Üzletközpont)', 'street': 'Palotai út', |
27
|
|
|
'housenumber': '6', 'conscriptionnumber': None}, |
28
|
|
|
{'original': 'Budai Vám 1.', 'street': 'Budai Vám', |
29
|
|
|
'housenumber': '1', 'conscriptionnumber': None}, |
30
|
|
|
{'original': 'Kaszás u. 2.-Dózsa György út 87.', 'street': 'Dózsa György út', |
31
|
|
|
'housenumber': '87', |
32
|
|
|
'conscriptionnumber': None}, |
33
|
|
|
{'original': 'Bajcsy Zs. út 11.', 'street': 'Bajcsy-Zsilinszky út', 'housenumber': '11', |
34
|
|
|
'conscriptionnumber': None}, |
35
|
|
|
{'original': 'Hunyadi János út 19. - Savoya Park', 'street': 'Hunyadi János út', 'housenumber': '19', |
|
|
|
|
36
|
|
|
'conscriptionnumber': None}, |
37
|
|
|
{'original': 'Kölcsey F. utca 1.', 'street': 'Kölcsey Ferenc utca', 'housenumber': '1', |
38
|
|
|
'conscriptionnumber': None}, |
39
|
|
|
{'original': 'Várkerület 41.', 'street': None, 'housenumber': None, |
40
|
|
|
'conscriptionnumber': None}, # TODO: this is wrong |
|
|
|
|
41
|
|
|
{'original': 'Bajcsy-Zs. E. u. 31.', 'street': 'Bajcsy-Zsilinszky Endre utca', 'housenumber': '31', |
|
|
|
|
42
|
|
|
'conscriptionnumber': None}, |
43
|
|
|
{'original': 'Bajcsy Zs.u. 77.', 'street': 'Bajcsy-Zsilinszky utca', 'housenumber': '77', |
|
|
|
|
44
|
|
|
'conscriptionnumber': None}, |
45
|
|
|
{'original': 'Dózsa Gy.u.6.', 'street': 'Dózsa György utca', 'housenumber': '6', |
46
|
|
|
'conscriptionnumber': None}, |
47
|
|
|
{'original': 'Krisztina krt. 65-67.', 'street': 'Krisztina körút', 'housenumber': '65-67', |
|
|
|
|
48
|
|
|
'conscriptionnumber': None}, |
49
|
|
|
{'original': 'OLADI LTP. (DOLGOZÓK U.)', 'street': 'OLADI lakótelep', 'housenumber': None, |
|
|
|
|
50
|
|
|
'conscriptionnumber': None}, |
51
|
|
|
{'original': 'Fő út 24.', 'street': 'Fő út', 'housenumber': '24', |
52
|
|
|
'conscriptionnumber': None}, |
53
|
|
|
{'original': 'Törvényház u. 4.', 'street': 'Törvényház utca', 'housenumber': '4', |
54
|
|
|
'conscriptionnumber': None}, |
55
|
|
|
{'original': 'Alkotás u. 53.', 'street': 'Alkotás utca', 'housenumber': '53', |
56
|
|
|
'conscriptionnumber': None}, |
57
|
|
|
] |
58
|
|
|
|
59
|
|
|
def test_extract_street_housenumber_better_2(self): |
|
|
|
|
60
|
|
|
for i in self.addresses: |
61
|
|
|
original, street, housenumber, conscriptionnumber = i['original'], i['street'], i['housenumber'], i[ |
|
|
|
|
62
|
|
|
'conscriptionnumber'] |
63
|
|
|
a, b, c = extract_street_housenumber_better_2(original) |
|
|
|
|
64
|
|
|
with self.subTest(): |
65
|
|
|
self.assertEqual(street, a) |
66
|
|
|
with self.subTest(): |
67
|
|
|
self.assertEqual(housenumber, b) |
68
|
|
|
with self.subTest(): |
69
|
|
|
self.assertEqual(conscriptionnumber, c) |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
class TestFullAddressResolver(unittest.TestCase): |
|
|
|
|
73
|
|
|
def setUp(self): |
74
|
|
|
self.addresses = [ |
75
|
|
|
{'original': '9737 Bük, Petőfi utca 63. Fszt. 1.', 'postcode': '9737', 'city': 'Bük', |
76
|
|
|
'street': 'Petőfi utca', 'housenumber': '63', |
77
|
|
|
'conscriptionnumber': None}, |
78
|
|
|
{'original': '5662 Csanádapáca', 'postcode': '5662', 'city': 'Csanádapáca', |
79
|
|
|
'street': None, 'housenumber': None, 'conscriptionnumber': None}, |
80
|
|
|
{'original': '2463 Tordas Köztársaság u.8/a.', 'postcode': '2463', 'city': 'Tordas', |
81
|
|
|
'street': 'Köztársaság utca', 'housenumber': '8/A', 'conscriptionnumber': None}, |
82
|
|
|
{'original': '2000 Szentendre Vasvári Pál u. 2794/16 hrsz.', 'postcode': '2000', 'city': 'Szentendre', |
|
|
|
|
83
|
|
|
'street': 'Vasvári Pál utca', 'housenumber': None, 'conscriptionnumber': '2794/16'}, |
84
|
|
|
] |
85
|
|
|
|
86
|
|
|
def test_extract_all_address(self): |
|
|
|
|
87
|
|
|
for i in self.addresses: |
88
|
|
|
original, postcode, city, street, housenumber, conscriptionnumber = i['original'], i['postcode'], \ |
|
|
|
|
89
|
|
|
i['city'], i['street'], \ |
|
|
|
|
90
|
|
|
i['housenumber'], \ |
91
|
|
|
i['conscriptionnumber'] |
|
|
|
|
92
|
|
|
a, b, c, d, e = extract_all_address(original) |
|
|
|
|
93
|
|
|
with self.subTest(): |
94
|
|
|
self.assertEqual(postcode, a) |
95
|
|
|
with self.subTest(): |
96
|
|
|
self.assertEqual(city, b) |
97
|
|
|
with self.subTest(): |
98
|
|
|
self.assertEqual(street, c) |
99
|
|
|
with self.subTest(): |
100
|
|
|
self.assertEqual(housenumber, d) |
101
|
|
|
with self.subTest(): |
102
|
|
|
self.assertEqual(conscriptionnumber, e) |
103
|
|
|
|
104
|
|
|
|
105
|
|
|
class TestOpeningHoursCleaner(unittest.TestCase): |
|
|
|
|
106
|
|
|
def setUp(self): |
107
|
|
|
self.opening_hours = [ |
108
|
|
|
{'original': '05:20-19:38', 'opening_hours_open': '05:20', 'opening_hours_close': '19:38'}, |
|
|
|
|
109
|
|
|
{'original': '6:44-21:00', 'opening_hours_open': '06:44', 'opening_hours_close': '21:00'}, |
|
|
|
|
110
|
|
|
{'original': '05:20-19:38 Reggel nyolctól bejárat az üzleten át', 'opening_hours_open': '05:20', |
|
|
|
|
111
|
|
|
'opening_hours_close': '19:38'}, |
112
|
|
|
{'original': ' 7:41 - 22:30 ', 'opening_hours_open': '07:41', 'opening_hours_close': '22:30'}, |
|
|
|
|
113
|
|
|
{'original': ' 05:30 - 23:00 ', 'opening_hours_open': '05:30', 'opening_hours_close': '23:00'}, |
|
|
|
|
114
|
|
|
{'original': ' - ', 'opening_hours_open': None, 'opening_hours_close': None}, ] |
115
|
|
|
|
116
|
|
|
def test_clean_opening_hours(self): |
|
|
|
|
117
|
|
|
for i in self.opening_hours: |
118
|
|
|
original, oho, ohc = i['original'], i['opening_hours_open'], i['opening_hours_close'] |
119
|
|
|
a, b = clean_opening_hours(original) |
|
|
|
|
120
|
|
|
with self.subTest(): |
121
|
|
|
self.assertEqual(oho, a) |
122
|
|
|
with self.subTest(): |
123
|
|
|
self.assertEqual(ohc, b) |
124
|
|
|
|
125
|
|
|
|
126
|
|
|
class TestOpeningHoursCleaner2(unittest.TestCase): |
|
|
|
|
127
|
|
|
def setUp(self): |
128
|
|
|
self.opening_hours = [ |
129
|
|
|
{'original': '600', 'converted': '06:00'}, |
130
|
|
|
{'original': '0644', 'converted': '06:44'}, |
131
|
|
|
{'original': '2359', 'converted': '23:59'}, |
132
|
|
|
{'original': '-1', 'converted': None}, |
133
|
|
|
] |
134
|
|
|
|
135
|
|
|
def test_clean_opening_hours(self): |
|
|
|
|
136
|
|
|
for i in self.opening_hours: |
137
|
|
|
original, oho = i['original'], i['converted'] |
138
|
|
|
a = clean_opening_hours_2(original) |
|
|
|
|
139
|
|
|
with self.subTest(): |
140
|
|
|
self.assertEqual(oho, a) |
141
|
|
|
|
142
|
|
|
|
143
|
|
|
class TestPhoneClener(unittest.TestCase): |
|
|
|
|
144
|
|
|
def setUp(self): |
145
|
|
|
self.phones = [ |
146
|
|
|
{'original': '0684/330-734, 0630/2374-712', 'converted': ['+36 84 330 734', '+36 30 237 4712']}, |
|
|
|
|
147
|
|
|
{'original': '06-20-200-4000', 'converted': ['+36 20 200 4000']}, |
148
|
|
|
{'original': '62464024', 'converted': ['+36 62 464 024']}, |
149
|
|
|
{'original': ' 3684330 - 734', 'converted': ['+36 84 330 734']}, |
150
|
|
|
{'original': '06205089009(Központi Telszám: Benzinkút, Motel, Kávézó, Szobafoglalás)', |
151
|
|
|
'converted': ['+36 20 508 9009']}, |
152
|
|
|
{'original': ' ', 'converted': None}, |
153
|
|
|
] |
154
|
|
|
|
155
|
|
|
def test_clean_phone(self): |
|
|
|
|
156
|
|
|
for i in self.phones: |
157
|
|
|
original, ph = i['original'], i['converted'] |
|
|
|
|
158
|
|
|
a = clean_phone(original) |
|
|
|
|
159
|
|
|
with self.subTest(): |
160
|
|
|
self.assertEqual(ph, a) |
161
|
|
|
|
162
|
|
|
|
163
|
|
|
class TestPhoneClener_to_str(unittest.TestCase): |
|
|
|
|
164
|
|
|
def setUp(self): |
165
|
|
|
self.phones = [ |
166
|
|
|
{'original': '0684/330-734, 0630/2374-712', 'converted': '+36 84 330 734;+36 30 237 4712'}, |
|
|
|
|
167
|
|
|
{'original': '06-20-200-4000', 'converted': '+36 20 200 4000'}, |
168
|
|
|
{'original': '62464024', 'converted': '+36 62 464 024'}, |
169
|
|
|
{'original': ' 3684330 - 734', 'converted': '+36 84 330 734'}, |
170
|
|
|
{'original': '06205089009(Központi Telszám: Benzinkút, Motel, Kávézó, Szobafoglalás)', |
171
|
|
|
'converted': '+36 20 508 9009'}, |
172
|
|
|
{'original': ' ', 'converted': None}, |
173
|
|
|
{'original': '+36303035698', 'converted': '+36 30 303 5698'}, |
174
|
|
|
] |
175
|
|
|
|
176
|
|
|
def test_clean_phone(self): |
|
|
|
|
177
|
|
|
for i in self.phones: |
178
|
|
|
original, ph = i['original'], i['converted'] |
|
|
|
|
179
|
|
|
a = clean_phone_to_str(original) |
|
|
|
|
180
|
|
|
with self.subTest(): |
181
|
|
|
self.assertEqual(ph, a) |
182
|
|
|
|
183
|
|
|
|
184
|
|
|
class TestStringCleaner(unittest.TestCase): |
|
|
|
|
185
|
|
|
def setUp(self): |
186
|
|
|
self.phones = [ |
187
|
|
|
{'original': ' ablak zsiráf ', 'converted': 'ablak zsiráf'}, |
188
|
|
|
] |
189
|
|
|
|
190
|
|
|
def test_clean_string(self): |
|
|
|
|
191
|
|
|
for i in self.phones: |
192
|
|
|
original, ph = i['original'], i['converted'] |
|
|
|
|
193
|
|
|
a = clean_string(original) |
|
|
|
|
194
|
|
|
with self.subTest(): |
195
|
|
|
self.assertEqual(ph, a) |
196
|
|
|
|
197
|
|
|
|
198
|
|
|
class TestURLCleaner(unittest.TestCase): |
|
|
|
|
199
|
|
|
def setUp(self): |
200
|
|
|
self.urls = [ |
201
|
|
|
{'original': ' https://examle.com//tests//url// ', 'converted': 'https://examle.com/tests/url/'}, |
|
|
|
|
202
|
|
|
{'original': ' https://examle.com/////tests///url ', 'converted': 'https://examle.com/tests/url'}, |
|
|
|
|
203
|
|
|
] |
204
|
|
|
|
205
|
|
|
def test_clean_url(self): |
|
|
|
|
206
|
|
|
for i in self.urls: |
207
|
|
|
original, ph = i['original'], i['converted'] |
|
|
|
|
208
|
|
|
a = clean_url(original) |
|
|
|
|
209
|
|
|
with self.subTest(): |
210
|
|
|
self.assertEqual(ph, a) |
211
|
|
|
|
212
|
|
|
|
213
|
|
|
class TestCityCleaner(unittest.TestCase): |
|
|
|
|
214
|
|
|
def setUp(self): |
215
|
|
|
self.addresses = [ |
216
|
|
|
{'original': 'Bük', 'city': 'Bük'}, |
217
|
|
|
{'original': 'Csanádapáca', 'city': 'Csanádapáca'}, |
218
|
|
|
{'original': 'Tordas', 'city': 'Tordas'}, |
219
|
|
|
{'original': 'Szentendre', 'city': 'Szentendre'}, |
220
|
|
|
{'original': 'Budapest I. Kerület', 'city': 'Budapest'}, |
221
|
|
|
{'original': 'Budapest Xxiii. Kerület', 'city': 'Budapest'}, |
222
|
|
|
{'original': 'Mikolc', 'city': 'Miskolc'}, |
223
|
|
|
{'original': 'Iinárcs', 'city': 'Inárcs'}, |
224
|
|
|
] |
225
|
|
|
|
226
|
|
|
def test_clean_city(self): |
|
|
|
|
227
|
|
|
for i in self.addresses: |
228
|
|
|
original, city = i['original'], i['city'] |
229
|
|
|
a = clean_city(original) |
|
|
|
|
230
|
|
|
with self.subTest(): |
231
|
|
|
self.assertEqual(city, a) |
232
|
|
|
|