1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
2
|
|
|
|
3
|
|
|
try: |
4
|
|
|
import logging |
5
|
|
|
import sys |
6
|
|
|
import os |
7
|
|
|
import re |
8
|
|
|
from lxml import etree |
|
|
|
|
9
|
|
|
from osm_poi_matchmaker.dao.data_handlers import insert_poi_dataframe |
|
|
|
|
10
|
|
|
from osm_poi_matchmaker.libs.soup import save_downloaded_soup |
11
|
|
|
from osm_poi_matchmaker.libs.address import clean_city, clean_phone_to_str, clean_street, clean_street_type |
|
|
|
|
12
|
|
|
from osm_poi_matchmaker.libs.geo import check_hu_boundary |
13
|
|
|
from osm_poi_matchmaker.utils.enums import WeekDaysLongHU |
14
|
|
|
from osm_poi_matchmaker.utils.data_provider import DataProvider |
15
|
|
|
from osm_poi_matchmaker.utils.enums import FileType |
16
|
|
|
except ImportError as err: |
17
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
18
|
|
|
logging.exception('Exception occurred') |
19
|
|
|
sys.exit(128) |
20
|
|
|
|
21
|
|
|
|
22
|
|
|
class hu_posta(DataProvider): |
|
|
|
|
23
|
|
|
|
24
|
|
View Code Duplication |
def constains(self): |
|
|
|
|
25
|
|
|
self.link = 'http://httpmegosztas.posta.hu/PartnerExtra/OUT/PostInfo.xml' |
26
|
|
|
self.tags = {'brand': 'Magyar Posta', 'operator': 'Magyar Posta Zrt.', |
27
|
|
|
'operator:addr': '1138 Budapest, Dunavirág utca 2-6.', 'ref:vatin:hu': '10901232-2-44', |
|
|
|
|
28
|
|
|
'ref:vatin': 'HU10901232', 'brand:wikipedia': 'hu:Magyar Posta Zrt.', 'brand:wikidata': 'Q145614', |
|
|
|
|
29
|
|
|
'contact:email': '[email protected]', 'phone': '+3617678200', |
30
|
|
|
'contact:facebook': 'https://www.facebook.com/MagyarPosta', |
31
|
|
|
'contact:youtube': 'https://www.youtube.com/user/magyarpostaofficial', |
32
|
|
|
'contact:instagram': 'https://www.instagram.com/magyar_posta_zrt', 'payment:cash': 'yes', |
|
|
|
|
33
|
|
|
'payment:debit_cards': 'yes'} |
34
|
|
|
self.filetype = FileType.xml |
35
|
|
|
self.filename = '{}.{}'.format( |
36
|
|
|
self.__class__.__name__, self.filetype.name) |
37
|
|
|
|
38
|
|
|
def types(self): |
39
|
|
|
hupostapo = {'amenity': 'post_office'} |
40
|
|
|
hupostapo.update(self.tags) |
41
|
|
|
hupostacse = {'amenity': 'vending_machine', 'vending': 'cheques'} |
42
|
|
|
hupostacse.update(self.tags) |
43
|
|
|
hupostacso = {'amenity': 'vending_machine', 'vending': 'parcel_pickup'} |
44
|
|
|
hupostacso.update(self.tags) |
45
|
|
|
hupostapp = {'amenity': 'post_office'} |
46
|
|
|
hupostapp.update(self.tags) |
47
|
|
|
hupostamp = {'amenity': 'post_office'} |
48
|
|
|
hupostamp.update(self.tags) |
49
|
|
|
self.__types = [ |
50
|
|
|
{'poi_code': 'hupostapo', 'poi_name': 'Posta', 'poi_type': 'post_office', |
51
|
|
|
'poi_tags': hupostapo, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
52
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 350, 'osm_search_distance_unsafe': 220, |
|
|
|
|
53
|
|
|
'preserve_original_post_code': True}, |
54
|
|
|
{'poi_code': 'hupostacse', 'poi_name': 'Posta csekkbefizető automata', |
55
|
|
|
'poi_type': 'vending_machine_cheques', |
56
|
|
|
'poi_tags': hupostacse, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
57
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300, 'osm_search_distance_unsafe': 220}, |
|
|
|
|
58
|
|
|
{'poi_code': 'hupostacso', 'poi_name': 'Posta csomagautomata', |
59
|
|
|
'poi_type': 'vending_machine_parcel_pickup', |
60
|
|
|
'poi_tags': hupostacso, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': '(mpl|posta)', |
|
|
|
|
61
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200}, |
62
|
|
|
{'poi_code': 'hupostapp', 'poi_name': 'PostaPont', 'poi_type': 'post_office', |
63
|
|
|
'poi_tags': hupostapp, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': '(postapont|posta)', |
|
|
|
|
64
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300, |
65
|
|
|
'osm_search_distance_unsafe': 220, 'preserve_original_post_code': True}, |
66
|
|
|
{'poi_code': 'hupostamp', 'poi_name': 'Mobilposta', 'poi_type': 'post_office', |
67
|
|
|
'poi_tags': hupostamp, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
68
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300}] |
69
|
|
|
return self.__types |
70
|
|
|
|
71
|
|
|
def process(self): |
72
|
|
|
try: |
|
|
|
|
73
|
|
|
soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), |
|
|
|
|
74
|
|
|
self.filetype) |
75
|
|
|
for e in soup.findAll('post'): |
|
|
|
|
76
|
|
|
try: |
77
|
|
|
# If this is a closed post office, skip it |
78
|
|
|
# if e.get('ispostpoint') == '0': |
79
|
|
|
# continue |
80
|
|
|
# The 'kirendeltség' post offices are not available to end users, so we remove them |
|
|
|
|
81
|
|
|
if 'okmányiroda' in e.find('name').get_text().lower() or \ |
|
|
|
|
82
|
|
|
'mol kirendeltség' in e.find('name').get_text().lower(): |
83
|
|
|
logging.debug('Skipping non public post office.') |
84
|
|
|
continue |
85
|
|
|
else: |
86
|
|
|
if e.servicepointtype.get_text() == 'PM': |
87
|
|
|
self.data.name = 'Posta' |
88
|
|
|
self.data.code = 'hupostapo' |
89
|
|
|
self.data.public_holiday_open = False |
90
|
|
|
elif e.servicepointtype.get_text() == 'CS': |
91
|
|
|
self.data.name = 'Posta csomagautomata' |
92
|
|
|
self.data.code = 'hupostacso' |
93
|
|
|
self.data.public_holiday_open = True |
94
|
|
|
elif e.servicepointtype.get_text() == 'PP': |
95
|
|
|
self.data.name = 'PostaPont' |
96
|
|
|
self.data.code = 'hupostapp' |
97
|
|
|
self.data.public_holiday_open = False |
98
|
|
|
else: |
99
|
|
|
logging.error('Non existing Posta type.') |
100
|
|
|
self.data.postcode = e.get('zipcode') |
101
|
|
|
self.data.housenumber = e.street.housenumber.get_text().split('(', 1)[0].strip() \ |
|
|
|
|
102
|
|
|
if e.street.housenumber is not None else None |
103
|
|
|
self.data.conscriptionnumber = None |
104
|
|
|
self.data.city = clean_city(e.city.get_text()) |
105
|
|
|
self.data.branch = e.find('name').get_text( |
106
|
|
|
) if e.find('name') is not None else None |
107
|
|
|
if self.data.code == 'hupostapo': |
108
|
|
|
self.data.branch = re.sub( |
109
|
|
|
r"(\d{1,3})", r"\1. számú", self.data.branch) |
110
|
|
|
days = e.findAll('days') if e.findAll( |
111
|
|
|
'days') is not None else None |
112
|
|
|
nonstop_num = 0 |
113
|
|
|
for d in days: |
|
|
|
|
114
|
|
|
if len(d) != 0: |
115
|
|
|
day_key = None |
116
|
|
|
# Try to match day name in data source (day tag) with on of WeekDaysLongHU enum element |
|
|
|
|
117
|
|
|
# Select day based on d.day matching |
118
|
|
|
for rd in WeekDaysLongHU: |
|
|
|
|
119
|
|
|
if rd.name == d.day.get_text(): |
|
|
|
|
120
|
|
|
day_key = rd.value |
121
|
|
|
break |
122
|
|
|
else: |
123
|
|
|
day_key = None |
124
|
|
|
# No day matching skip to next |
125
|
|
|
# Skip days that are not exist at data provider's |
126
|
|
|
if day_key is None: |
|
|
|
|
127
|
|
|
logging.warning('Cannot find any opening hours information for day {}.'. |
|
|
|
|
128
|
|
|
format(rd.name)) |
|
|
|
|
129
|
|
|
continue |
130
|
|
|
else: |
131
|
|
|
# Extract from and to information |
132
|
|
|
from1 = d.from1.get_text() if d.from1 is not None else None |
133
|
|
|
to1 = d.to1.get_text() if d.to1 is not None else None |
134
|
|
|
from2 = d.from2.get_text() if d.from2 is not None else None |
135
|
|
|
to2 = d.to2.get_text() if d.to2 is not None else None |
136
|
|
|
# Avoid duplicated values of opening and close |
137
|
|
|
if from1 != from2 and to1 != to2: |
138
|
|
|
logging.debug('Opening hours in post office: %s: %s-%s; %s-%s.', |
|
|
|
|
139
|
|
|
self.data.branch, from1, to1, from2, to2) |
140
|
|
|
self.data.day_open(day_key, from1) |
141
|
|
|
if from2 is None or to2 is None: |
142
|
|
|
self.data.day_close(day_key, from1) |
143
|
|
|
# Count opening hours with nonstop like settings |
144
|
|
|
if from1 in '0:00' and to1 in ['0:00', '23:59', '24:00']: |
|
|
|
|
145
|
|
|
nonstop_num += 1 |
146
|
|
|
else: |
147
|
|
|
# Check on Wednesday if there is a lunch break |
148
|
|
|
# Only same lunch break is supported for every days |
149
|
|
|
if day_key == 3: |
150
|
|
|
self.data.lunch_break_start = to1 |
151
|
|
|
self.data.lunch_break_stop = from2 |
152
|
|
|
self.data.day_close(day_key, to2) |
153
|
|
|
# Count opening hours with nonstop like settings |
154
|
|
|
if from1 in '0:00' and to2 in ['0:00', '23:59', '24:00']: |
|
|
|
|
155
|
|
|
nonstop_num += 1 |
156
|
|
|
else: |
157
|
|
|
# It seems there are duplications in Posta data source |
158
|
|
|
# Remove duplicates |
159
|
|
|
logging.warning('Dulicated opening hours in post office: %s: %s-%s; %s-%s.', |
|
|
|
|
160
|
|
|
self.data.branch, from1, to1, from2, to2) |
161
|
|
|
from2, to2 = None, None |
162
|
|
|
# All times are open so it is non stop |
163
|
|
|
if nonstop_num >= 7: |
164
|
|
|
logging.debug('It is a non stop post office.') |
165
|
|
|
self.data.nonstop = True |
166
|
|
|
self.data.lat, self.data.lon = \ |
167
|
|
|
check_hu_boundary(e.gpsdata.wgslat.get_text().replace(',', '.'), |
168
|
|
|
e.gpsdata.wgslon.get_text().replace(',', '.')) |
169
|
|
|
# Get street name and type |
170
|
|
|
street_tmp_1 = clean_street(e.street.find('name').get_text().strip()) \ |
171
|
|
|
if e.street.find('name') is not None else None |
172
|
|
|
street_tmp_2 = clean_street_type(e.street.type.get_text().strip()) \ |
173
|
|
|
if e.street.type is not None else None |
174
|
|
|
# Streets without types |
175
|
|
|
if street_tmp_2 is None: |
176
|
|
|
self.data.street = street_tmp_1 |
177
|
|
|
# Since there is no original address format we create one |
178
|
|
|
if self.data.housenumber is not None: |
179
|
|
|
self.data.original = '{} {}'.format( |
180
|
|
|
street_tmp_1, self.data.housenumber) |
181
|
|
|
else: |
182
|
|
|
self.data.original = '{}'.format(street_tmp_1) |
183
|
|
|
# Street with types |
184
|
|
|
elif street_tmp_1 is not None and street_tmp_2 is not None: |
185
|
|
|
self.data.street = '{} {}'.format( |
186
|
|
|
street_tmp_1, street_tmp_2) |
187
|
|
|
# Since there is no original address format we create one |
188
|
|
|
if self.data.housenumber is not None: |
189
|
|
|
self.data.original = '{} {} {}'.format(street_tmp_1, street_tmp_2, |
190
|
|
|
self.data.housenumber) |
191
|
|
|
else: |
192
|
|
|
self.data.original = '{} {}'.format( |
193
|
|
|
street_tmp_1, street_tmp_2) |
194
|
|
|
else: |
195
|
|
|
logging.error( |
196
|
|
|
'Non handled state in street data processing!') |
197
|
|
|
self.data.phone = clean_phone_to_str(e.phonearea.get_text()) \ |
198
|
|
|
if e.phonearea is not None else None |
199
|
|
|
self.data.email = e.email.get_text().strip() if e.email is not None else None |
|
|
|
|
200
|
|
|
self.data.add() |
201
|
|
|
except Exception as err: |
|
|
|
|
202
|
|
|
logging.error(err) |
203
|
|
|
logging.error(e) |
204
|
|
|
logging.exception('Exception occurred') |
205
|
|
|
|
206
|
|
|
except Exception as err: |
|
|
|
|
207
|
|
|
logging.exception('Exception occurred') |
208
|
|
|
|
209
|
|
|
logging.error(err) |
210
|
|
|
|