|
1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
try: |
|
4
|
|
|
import logging |
|
5
|
|
|
import sys |
|
6
|
|
|
import os |
|
7
|
|
|
import re |
|
8
|
|
|
from lxml import etree |
|
|
|
|
|
|
9
|
|
|
from osm_poi_matchmaker.dao.data_handlers import insert_poi_dataframe |
|
|
|
|
|
|
10
|
|
|
from osm_poi_matchmaker.libs.soup import save_downloaded_soup |
|
11
|
|
|
from osm_poi_matchmaker.libs.address import clean_city, clean_phone_to_str, clean_street, clean_street_type |
|
|
|
|
|
|
12
|
|
|
from osm_poi_matchmaker.libs.geo import check_hu_boundary |
|
13
|
|
|
from osm_poi_matchmaker.utils.enums import WeekDaysLongHU |
|
14
|
|
|
from osm_poi_matchmaker.utils.data_provider import DataProvider |
|
15
|
|
|
from osm_poi_matchmaker.utils.enums import FileType |
|
16
|
|
|
except ImportError as err: |
|
17
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
|
18
|
|
|
logging.exception('Exception occurred') |
|
19
|
|
|
sys.exit(128) |
|
20
|
|
|
|
|
21
|
|
|
|
|
22
|
|
|
class hu_posta(DataProvider): |
|
|
|
|
|
|
23
|
|
|
|
|
24
|
|
View Code Duplication |
def constains(self): |
|
|
|
|
|
|
25
|
|
|
self.link = 'http://httpmegosztas.posta.hu/PartnerExtra/OUT/PostInfo.xml' |
|
26
|
|
|
self.tags = {'brand': 'Magyar Posta', 'operator': 'Magyar Posta Zrt.', |
|
27
|
|
|
'operator:addr': '1138 Budapest, Dunavirág utca 2-6.', 'ref:vatin:hu': '10901232-2-44', |
|
|
|
|
|
|
28
|
|
|
'ref:vatin': 'HU10901232', 'brand:wikipedia': 'hu:Magyar Posta Zrt.', 'brand:wikidata': 'Q145614', |
|
|
|
|
|
|
29
|
|
|
'contact:email': '[email protected]', 'phone': '+3617678200', |
|
30
|
|
|
'contact:facebook': 'https://www.facebook.com/MagyarPosta', |
|
31
|
|
|
'contact:youtube': 'https://www.youtube.com/user/magyarpostaofficial', |
|
32
|
|
|
'contact:instagram': 'https://www.instagram.com/magyar_posta_zrt', 'payment:cash': 'yes', |
|
|
|
|
|
|
33
|
|
|
'payment:debit_cards': 'yes'} |
|
34
|
|
|
self.filetype = FileType.xml |
|
35
|
|
|
self.filename = '{}.{}'.format( |
|
36
|
|
|
self.__class__.__name__, self.filetype.name) |
|
37
|
|
|
|
|
38
|
|
|
def types(self): |
|
39
|
|
|
hupostapo = {'amenity': 'post_office'} |
|
40
|
|
|
hupostapo.update(self.tags) |
|
41
|
|
|
hupostacse = {'amenity': 'vending_machine', 'vending': 'cheques'} |
|
42
|
|
|
hupostacse.update(self.tags) |
|
43
|
|
|
hupostacso = {'amenity': 'vending_machine', 'vending': 'parcel_pickup'} |
|
44
|
|
|
hupostacso.update(self.tags) |
|
45
|
|
|
hupostapp = {'amenity': 'post_office'} |
|
46
|
|
|
hupostapp.update(self.tags) |
|
47
|
|
|
hupostamp = {'amenity': 'post_office'} |
|
48
|
|
|
hupostamp.update(self.tags) |
|
49
|
|
|
self.__types = [ |
|
50
|
|
|
{'poi_code': 'hupostapo', 'poi_name': 'Posta', 'poi_type': 'post_office', |
|
51
|
|
|
'poi_tags': hupostapo, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
|
|
52
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 350, 'osm_search_distance_unsafe': 220, |
|
|
|
|
|
|
53
|
|
|
'preserve_original_post_code': True}, |
|
54
|
|
|
{'poi_code': 'hupostacse', 'poi_name': 'Posta csekkbefizető automata', |
|
55
|
|
|
'poi_type': 'vending_machine_cheques', |
|
56
|
|
|
'poi_tags': hupostacse, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
|
|
57
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300, 'osm_search_distance_unsafe': 220}, |
|
|
|
|
|
|
58
|
|
|
{'poi_code': 'hupostacso', 'poi_name': 'Posta csomagautomata', |
|
59
|
|
|
'poi_type': 'vending_machine_parcel_pickup', |
|
60
|
|
|
'poi_tags': hupostacso, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': '(mpl|posta)', |
|
|
|
|
|
|
61
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200}, |
|
62
|
|
|
{'poi_code': 'hupostapp', 'poi_name': 'PostaPont', 'poi_type': 'post_office', |
|
63
|
|
|
'poi_tags': hupostapp, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': '(postapont|posta)', |
|
|
|
|
|
|
64
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300, |
|
65
|
|
|
'osm_search_distance_unsafe': 220, 'preserve_original_post_code': True}, |
|
66
|
|
|
{'poi_code': 'hupostamp', 'poi_name': 'Mobilposta', 'poi_type': 'post_office', |
|
67
|
|
|
'poi_tags': hupostamp, 'poi_url_base': 'https://www.posta.hu', 'poi_search_name': 'posta', |
|
|
|
|
|
|
68
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 300}] |
|
69
|
|
|
return self.__types |
|
70
|
|
|
|
|
71
|
|
|
def process(self): |
|
72
|
|
|
try: |
|
|
|
|
|
|
73
|
|
|
soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), |
|
|
|
|
|
|
74
|
|
|
self.filetype) |
|
75
|
|
|
for e in soup.findAll('post'): |
|
|
|
|
|
|
76
|
|
|
try: |
|
77
|
|
|
# If this is a closed post office, skip it |
|
78
|
|
|
# if e.get('ispostpoint') == '0': |
|
79
|
|
|
# continue |
|
80
|
|
|
# The 'kirendeltség' post offices are not available to end users, so we remove them |
|
|
|
|
|
|
81
|
|
|
if 'okmányiroda' in e.find('name').get_text().lower() or \ |
|
|
|
|
|
|
82
|
|
|
'mol kirendeltség' in e.find('name').get_text().lower(): |
|
83
|
|
|
logging.debug('Skipping non public post office.') |
|
84
|
|
|
continue |
|
85
|
|
|
else: |
|
86
|
|
|
if e.servicepointtype.get_text() == 'PM': |
|
87
|
|
|
self.data.name = 'Posta' |
|
88
|
|
|
self.data.code = 'hupostapo' |
|
89
|
|
|
self.data.public_holiday_open = False |
|
90
|
|
|
elif e.servicepointtype.get_text() == 'CS': |
|
91
|
|
|
self.data.name = 'Posta csomagautomata' |
|
92
|
|
|
self.data.code = 'hupostacso' |
|
93
|
|
|
self.data.public_holiday_open = True |
|
94
|
|
|
elif e.servicepointtype.get_text() == 'PP': |
|
95
|
|
|
self.data.name = 'PostaPont' |
|
96
|
|
|
self.data.code = 'hupostapp' |
|
97
|
|
|
self.data.public_holiday_open = False |
|
98
|
|
|
else: |
|
99
|
|
|
logging.error('Non existing Posta type.') |
|
100
|
|
|
self.data.postcode = e.get('zipcode') |
|
101
|
|
|
self.data.housenumber = e.street.housenumber.get_text().split('(', 1)[0].strip() \ |
|
|
|
|
|
|
102
|
|
|
if e.street.housenumber is not None else None |
|
103
|
|
|
self.data.conscriptionnumber = None |
|
104
|
|
|
self.data.city = clean_city(e.city.get_text()) |
|
105
|
|
|
self.data.branch = e.find('name').get_text( |
|
106
|
|
|
) if e.find('name') is not None else None |
|
107
|
|
|
if self.data.code == 'hupostapo': |
|
108
|
|
|
self.data.branch = re.sub( |
|
109
|
|
|
r"(\d{1,3})", r"\1. számú", self.data.branch) |
|
110
|
|
|
days = e.findAll('days') if e.findAll( |
|
111
|
|
|
'days') is not None else None |
|
112
|
|
|
nonstop_num = 0 |
|
113
|
|
|
for d in days: |
|
|
|
|
|
|
114
|
|
|
if len(d) != 0: |
|
115
|
|
|
day_key = None |
|
116
|
|
|
# Try to match day name in data source (day tag) with on of WeekDaysLongHU enum element |
|
|
|
|
|
|
117
|
|
|
# Select day based on d.day matching |
|
118
|
|
|
for rd in WeekDaysLongHU: |
|
|
|
|
|
|
119
|
|
|
if rd.name == d.day.get_text(): |
|
|
|
|
|
|
120
|
|
|
day_key = rd.value |
|
121
|
|
|
break |
|
122
|
|
|
else: |
|
123
|
|
|
day_key = None |
|
124
|
|
|
# No day matching skip to next |
|
125
|
|
|
# Skip days that are not exist at data provider's |
|
126
|
|
|
if day_key is None: |
|
|
|
|
|
|
127
|
|
|
logging.warning('Cannot find any opening hours information for day {}.'. |
|
|
|
|
|
|
128
|
|
|
format(rd.name)) |
|
|
|
|
|
|
129
|
|
|
continue |
|
130
|
|
|
else: |
|
131
|
|
|
# Extract from and to information |
|
132
|
|
|
from1 = d.from1.get_text() if d.from1 is not None else None |
|
133
|
|
|
to1 = d.to1.get_text() if d.to1 is not None else None |
|
134
|
|
|
from2 = d.from2.get_text() if d.from2 is not None else None |
|
135
|
|
|
to2 = d.to2.get_text() if d.to2 is not None else None |
|
136
|
|
|
# Avoid duplicated values of opening and close |
|
137
|
|
|
if from1 != from2 and to1 != to2: |
|
138
|
|
|
logging.debug('Opening hours in post office: %s: %s-%s; %s-%s.', |
|
|
|
|
|
|
139
|
|
|
self.data.branch, from1, to1, from2, to2) |
|
140
|
|
|
self.data.day_open(day_key, from1) |
|
141
|
|
|
if from2 is None or to2 is None: |
|
142
|
|
|
self.data.day_close(day_key, from1) |
|
143
|
|
|
# Count opening hours with nonstop like settings |
|
144
|
|
|
if from1 in '0:00' and to1 in ['0:00', '23:59', '24:00']: |
|
|
|
|
|
|
145
|
|
|
nonstop_num += 1 |
|
146
|
|
|
else: |
|
147
|
|
|
# Check on Wednesday if there is a lunch break |
|
148
|
|
|
# Only same lunch break is supported for every days |
|
149
|
|
|
if day_key == 3: |
|
150
|
|
|
self.data.lunch_break_start = to1 |
|
151
|
|
|
self.data.lunch_break_stop = from2 |
|
152
|
|
|
self.data.day_close(day_key, to2) |
|
153
|
|
|
# Count opening hours with nonstop like settings |
|
154
|
|
|
if from1 in '0:00' and to2 in ['0:00', '23:59', '24:00']: |
|
|
|
|
|
|
155
|
|
|
nonstop_num += 1 |
|
156
|
|
|
else: |
|
157
|
|
|
# It seems there are duplications in Posta data source |
|
158
|
|
|
# Remove duplicates |
|
159
|
|
|
logging.warning('Dulicated opening hours in post office: %s: %s-%s; %s-%s.', |
|
|
|
|
|
|
160
|
|
|
self.data.branch, from1, to1, from2, to2) |
|
161
|
|
|
from2, to2 = None, None |
|
162
|
|
|
# All times are open so it is non stop |
|
163
|
|
|
if nonstop_num >= 7: |
|
164
|
|
|
logging.debug('It is a non stop post office.') |
|
165
|
|
|
self.data.nonstop = True |
|
166
|
|
|
self.data.lat, self.data.lon = \ |
|
167
|
|
|
check_hu_boundary(e.gpsdata.wgslat.get_text().replace(',', '.'), |
|
168
|
|
|
e.gpsdata.wgslon.get_text().replace(',', '.')) |
|
169
|
|
|
# Get street name and type |
|
170
|
|
|
street_tmp_1 = clean_street(e.street.find('name').get_text().strip()) \ |
|
171
|
|
|
if e.street.find('name') is not None else None |
|
172
|
|
|
street_tmp_2 = clean_street_type(e.street.type.get_text().strip()) \ |
|
173
|
|
|
if e.street.type is not None else None |
|
174
|
|
|
# Streets without types |
|
175
|
|
|
if street_tmp_2 is None: |
|
176
|
|
|
self.data.street = street_tmp_1 |
|
177
|
|
|
# Since there is no original address format we create one |
|
178
|
|
|
if self.data.housenumber is not None: |
|
179
|
|
|
self.data.original = '{} {}'.format( |
|
180
|
|
|
street_tmp_1, self.data.housenumber) |
|
181
|
|
|
else: |
|
182
|
|
|
self.data.original = '{}'.format(street_tmp_1) |
|
183
|
|
|
# Street with types |
|
184
|
|
|
elif street_tmp_1 is not None and street_tmp_2 is not None: |
|
185
|
|
|
self.data.street = '{} {}'.format( |
|
186
|
|
|
street_tmp_1, street_tmp_2) |
|
187
|
|
|
# Since there is no original address format we create one |
|
188
|
|
|
if self.data.housenumber is not None: |
|
189
|
|
|
self.data.original = '{} {} {}'.format(street_tmp_1, street_tmp_2, |
|
190
|
|
|
self.data.housenumber) |
|
191
|
|
|
else: |
|
192
|
|
|
self.data.original = '{} {}'.format( |
|
193
|
|
|
street_tmp_1, street_tmp_2) |
|
194
|
|
|
else: |
|
195
|
|
|
logging.error( |
|
196
|
|
|
'Non handled state in street data processing!') |
|
197
|
|
|
self.data.phone = clean_phone_to_str(e.phonearea.get_text()) \ |
|
198
|
|
|
if e.phonearea is not None else None |
|
199
|
|
|
self.data.email = e.email.get_text().strip() if e.email is not None else None |
|
|
|
|
|
|
200
|
|
|
self.data.add() |
|
201
|
|
|
except Exception as err: |
|
|
|
|
|
|
202
|
|
|
logging.error(err) |
|
203
|
|
|
logging.error(e) |
|
204
|
|
|
logging.exception('Exception occurred') |
|
205
|
|
|
|
|
206
|
|
|
except Exception as err: |
|
|
|
|
|
|
207
|
|
|
logging.exception('Exception occurred') |
|
208
|
|
|
|
|
209
|
|
|
logging.error(err) |
|
210
|
|
|
|