|
1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
__author__ = 'kami911' |
|
4
|
|
|
|
|
5
|
|
|
try: |
|
6
|
|
|
import logging |
|
7
|
|
|
import sys |
|
8
|
|
|
import pandas as pd |
|
9
|
|
|
from sqlalchemy.orm import scoped_session, sessionmaker |
|
10
|
|
|
from osmapi import OsmApi |
|
11
|
|
|
from osm_poi_matchmaker.dao.poi_base import POIBase |
|
12
|
|
|
from osm_poi_matchmaker.utils import config |
|
13
|
|
|
from osm_poi_matchmaker.dao.data_structure import OSM_object_type, POI_OSM_cache |
|
14
|
|
|
from osm_poi_matchmaker.libs.osm import query_postcode_osm_external |
|
15
|
|
|
from osm_poi_matchmaker.dao.data_handlers import get_or_create_cache |
|
16
|
|
|
except ImportError as err: |
|
17
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
|
18
|
|
|
logging.exception('Exception occurred') |
|
19
|
|
|
|
|
20
|
|
|
sys.exit(128) |
|
21
|
|
|
|
|
22
|
|
|
RETRY = 3 |
|
23
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
def online_poi_matching(args): |
|
|
|
|
|
|
26
|
|
|
data, comm_data = args |
|
27
|
|
|
try: |
|
|
|
|
|
|
28
|
|
|
db = POIBase('{}://{}:{}@{}:{}/{}'.format(config.get_database_type(), config.get_database_writer_username(), |
|
|
|
|
|
|
29
|
|
|
config.get_database_writer_password(), |
|
30
|
|
|
config.get_database_writer_host(), |
|
31
|
|
|
config.get_database_writer_port(), |
|
32
|
|
|
config.get_database_poi_database())) |
|
33
|
|
|
pgsql_pool = db.pool |
|
34
|
|
|
session_factory = sessionmaker(pgsql_pool) |
|
35
|
|
|
Session = scoped_session(session_factory) |
|
|
|
|
|
|
36
|
|
|
session = Session() |
|
37
|
|
|
osm_live_query = OsmApi() |
|
38
|
|
|
for i, row in data.iterrows(): |
|
39
|
|
|
# for i, row in data[data['poi_code'].str.contains('posta')].iterrows(): |
|
40
|
|
|
try: |
|
41
|
|
|
# Try to search OSM POI with same type, and name contains poi_search_name within the specified distance |
|
|
|
|
|
|
42
|
|
|
osm_query = db.query_osm_shop_poi_gpd(row.get('poi_lon'), row.get('poi_lat'), |
|
43
|
|
|
comm_data.loc[comm_data['pc_id'] == row.get('poi_common_id')][ |
|
|
|
|
|
|
44
|
|
|
'poi_type'].values[0], row.get('poi_search_name'), |
|
|
|
|
|
|
45
|
|
|
row.get('poi_search_avoid_name'), |
|
46
|
|
|
row.get('poi_addr_street'), row.get('poi_addr_housenumber'), |
|
|
|
|
|
|
47
|
|
|
row.get('poi_conscriptionnumber'), row.get('poi_city'), |
|
|
|
|
|
|
48
|
|
|
row.get('osm_search_distance_perfect'), |
|
49
|
|
|
row.get('osm_search_distance_safe'), |
|
50
|
|
|
row.get('osm_search_distance_unsafe')) |
|
51
|
|
|
# Enrich our data with OSM database POI metadata |
|
52
|
|
|
if osm_query is not None: |
|
53
|
|
|
row['poi_new'] = False |
|
54
|
|
|
# Collect additional OSM metadata. Note: this needs style change during osm2pgsql |
|
|
|
|
|
|
55
|
|
|
osm_id = osm_query['osm_id'].values[0] if osm_query.get('osm_id') is not None else None |
|
|
|
|
|
|
56
|
|
|
osm_node = osm_query.get('node').values[0] if osm_query.get('node') is not None else None |
|
|
|
|
|
|
57
|
|
|
# Set OSM POI coordinates for all kind of geom |
|
58
|
|
|
lat = osm_query.get('lat').values[0] |
|
59
|
|
|
lon = osm_query.get('lon').values[0] |
|
60
|
|
|
if data.at[i, 'poi_lat'] != lat and data.at[i, 'poi_lon'] != lon: |
|
61
|
|
|
logging.info('Using new coodinates %s %s instead of %s %s.', |
|
62
|
|
|
lat, lon, data.at[i, 'poi_lat'], data.at[i, 'poi_lon']) |
|
63
|
|
|
data.at[i, 'poi_lat'] = lat |
|
64
|
|
|
data.at[i, 'poi_lon'] = lon |
|
65
|
|
|
if osm_node == 'node': |
|
66
|
|
|
osm_node = OSM_object_type.node |
|
67
|
|
|
elif osm_node == 'way': |
|
68
|
|
|
osm_node = OSM_object_type.way |
|
69
|
|
|
elif osm_node == 'relation': |
|
70
|
|
|
osm_node = OSM_object_type.relation |
|
71
|
|
|
else: |
|
72
|
|
|
logging.warning('Illegal state: %s', osm_query['node'].values[0]) |
|
73
|
|
|
data.at[i, 'osm_id'] = osm_id |
|
74
|
|
|
data.at[i, 'osm_node'] = osm_node |
|
75
|
|
|
# Refine postcode |
|
76
|
|
|
if row['preserve_original_post_code'] is not True: |
|
77
|
|
|
# Current OSM postcode based on lat,long query. |
|
78
|
|
|
postcode = query_postcode_osm_external(config.get_geo_prefer_osm_postcode(), session, lon, lat, row.get('poi_postcode')) |
|
|
|
|
|
|
79
|
|
|
force_postcode_change = False # TODO: Has to be a setting in app.conf |
|
|
|
|
|
|
80
|
|
|
if force_postcode_change is True: |
|
81
|
|
|
# Force to use datasource postcode |
|
82
|
|
|
if postcode != row.get('poi_postcode'): |
|
83
|
|
|
logging.info('Changing postcode from %s to %s.', row.get('poi_postcode'), postcode) |
|
|
|
|
|
|
84
|
|
|
data.at[i, 'poi_postcode'] = postcode |
|
85
|
|
|
else: |
|
86
|
|
|
# Try to use smart method for postcode check |
|
87
|
|
|
ch_posctode = smart_postcode_check(row, osm_query, postcode) |
|
88
|
|
|
if ch_posctode is not None: |
|
89
|
|
|
data.at[i, 'poi_postcode'] = ch_posctode |
|
90
|
|
|
else: |
|
91
|
|
|
logging.info('Preserving original postcode %s', row.get('poi_postcode')) |
|
92
|
|
|
data.at[i, 'osm_version'] = osm_query['osm_version'].values[0] \ |
|
93
|
|
|
if osm_query['osm_version'] is not None else None |
|
94
|
|
|
data.at[i, 'osm_changeset'] = osm_query['osm_changeset'].values[0] \ |
|
95
|
|
|
if osm_query['osm_changeset'] is not None else None |
|
96
|
|
|
if osm_query['osm_timestamp'] is not None: |
|
97
|
|
|
osm_query['osm_timestamp'] = \ |
|
98
|
|
|
data.at[i, 'osm_timestamp'] = pd.to_datetime(str((osm_query['osm_timestamp'].values[0]))) |
|
|
|
|
|
|
99
|
|
|
else: |
|
100
|
|
|
osm_query['osm_timestamp'] = None |
|
101
|
|
|
data.at[i, 'poi_distance'] = osm_query.get('distance').values[0] if osm_query.get('distance') is not None else None |
|
|
|
|
|
|
102
|
|
|
# For OSM way also query node points |
|
103
|
|
|
if osm_node == OSM_object_type.way: |
|
104
|
|
|
logging.info('This is an OSM way looking for id %s nodes.', osm_id) |
|
105
|
|
|
# Add list of nodes to the dataframe |
|
106
|
|
|
nodes = db.query_ways_nodes(osm_id) |
|
107
|
|
|
data.at[i, 'osm_nodes'] = nodes |
|
108
|
|
|
elif osm_node == OSM_object_type.relation: |
|
109
|
|
|
logging.info('This is an OSM relation looking for id %s nodes.', osm_id) |
|
110
|
|
|
# Add list of relation nodes to the dataframe |
|
111
|
|
|
nodes = db.query_relation_nodes(osm_id) |
|
112
|
|
|
data.at[i, 'osm_nodes'] = nodes |
|
113
|
|
|
logging.info('Old %s (not %s) type: %s POI within %s m: %s %s, %s %s (%s)', |
|
114
|
|
|
data.at[i, 'poi_search_name'], data.at[i, 'poi_search_avoid_name'], |
|
|
|
|
|
|
115
|
|
|
data.at[i, 'poi_type'], data.at[i, 'poi_distance'], |
|
116
|
|
|
data.at[i, 'poi_postcode'], data.at[i, 'poi_city'], data.at[i, 'poi_addr_street'], |
|
|
|
|
|
|
117
|
|
|
data.at[i, 'poi_addr_housenumber'], data.at[i, 'poi_conscriptionnumber']) |
|
|
|
|
|
|
118
|
|
|
try: |
|
119
|
|
|
# Download OSM POI way live tags |
|
120
|
|
|
if osm_node == OSM_object_type.way: |
|
121
|
|
|
for rtc in range(0, RETRY): |
|
|
|
|
|
|
122
|
|
|
logging.info('Downloading OSM live tags to this way: %s.', osm_id) |
|
123
|
|
|
cached_way = db.query_from_cache(osm_id, osm_node) |
|
124
|
|
|
if cached_way is None: |
|
125
|
|
|
live_tags_container = osm_live_query.WayGet(osm_id) |
|
126
|
|
|
if live_tags_container is not None: |
|
|
|
|
|
|
127
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
|
128
|
|
|
cache_row = {'osm_id': int(osm_id), |
|
129
|
|
|
'osm_live_tags': live_tags_container.get('tag'), |
|
|
|
|
|
|
130
|
|
|
'osm_version': live_tags_container.get('version'), |
|
|
|
|
|
|
131
|
|
|
'osm_user': live_tags_container.get('user'), |
|
132
|
|
|
'osm_user_id': live_tags_container.get('uid'), |
|
133
|
|
|
'osm_changeset': live_tags_container.get('changeset'), |
|
|
|
|
|
|
134
|
|
|
'osm_timestamp': live_tags_container.get('timestamp'), |
|
|
|
|
|
|
135
|
|
|
'osm_object_type': osm_node, |
|
136
|
|
|
'osm_lat': None, |
|
137
|
|
|
'osm_lon': None, |
|
138
|
|
|
'osm_nodes': live_tags_container.get('nd')} |
|
139
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
|
140
|
|
|
# Downloading referenced nodes of the way |
|
141
|
|
|
for way_nodes in live_tags_container['nd']: |
|
142
|
|
|
logging.debug('Getting node %s belongs to way %s', way_nodes, osm_id) |
|
|
|
|
|
|
143
|
|
|
live_tags_node = osm_live_query.NodeGet(way_nodes) |
|
144
|
|
|
cache_row = {'osm_id': int(way_nodes), |
|
145
|
|
|
'osm_live_tags': live_tags_node.get('tag'), |
|
146
|
|
|
'osm_version': live_tags_node.get('version'), |
|
|
|
|
|
|
147
|
|
|
'osm_user': live_tags_node.get('user'), |
|
148
|
|
|
'osm_user_id': live_tags_node.get('uid'), |
|
149
|
|
|
'osm_changeset': live_tags_node.get('changeset'), |
|
|
|
|
|
|
150
|
|
|
'osm_timestamp': live_tags_node.get('timestamp'), |
|
|
|
|
|
|
151
|
|
|
'osm_object_type': OSM_object_type.node, |
|
152
|
|
|
'osm_lat': live_tags_node.get('lat'), |
|
153
|
|
|
'osm_lon': live_tags_node.get('lon'), |
|
154
|
|
|
'osm_nodes': None} |
|
155
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
|
156
|
|
|
break |
|
157
|
|
|
else: |
|
158
|
|
|
logging.warning('Download of external data has failed.') |
|
159
|
|
|
else: |
|
160
|
|
|
data.at[i, 'osm_live_tags'] = cached_way.get('osm_live_tags') |
|
161
|
|
|
break |
|
162
|
|
|
session.commit() |
|
163
|
|
|
# Download OSM POI node live tags |
|
164
|
|
|
elif osm_node == OSM_object_type.node: |
|
165
|
|
|
for rtc in range(0, RETRY): |
|
166
|
|
|
logging.info('Downloading OSM live tags to this node: %s.', osm_id) |
|
167
|
|
|
cached_node = db.query_from_cache(osm_id, osm_node) |
|
168
|
|
|
if cached_node is None: |
|
169
|
|
|
live_tags_container = osm_live_query.NodeGet(osm_id) |
|
170
|
|
|
if live_tags_container is not None: |
|
|
|
|
|
|
171
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
|
172
|
|
|
cache_row = {'osm_id': int(osm_id), |
|
173
|
|
|
'osm_live_tags': live_tags_container.get('tag'), |
|
|
|
|
|
|
174
|
|
|
'osm_version': live_tags_container.get('version'), |
|
|
|
|
|
|
175
|
|
|
'osm_user': live_tags_container.get('user'), |
|
176
|
|
|
'osm_user_id': live_tags_container.get('uid'), |
|
177
|
|
|
'osm_changeset': live_tags_container.get('changeset'), |
|
|
|
|
|
|
178
|
|
|
'osm_timestamp': live_tags_container.get('timestamp'), |
|
|
|
|
|
|
179
|
|
|
'osm_object_type': osm_node, |
|
180
|
|
|
'osm_lat': live_tags_container.get('lat'), |
|
181
|
|
|
'osm_lon': live_tags_container.get('lon'), |
|
182
|
|
|
'osm_nodes': None} |
|
183
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
|
184
|
|
|
break |
|
185
|
|
|
else: |
|
186
|
|
|
logging.warning('Download of external data has failed.') |
|
187
|
|
|
else: |
|
188
|
|
|
data.at[i, 'osm_live_tags'] = cached_node.get('osm_live_tags') |
|
189
|
|
|
break |
|
190
|
|
|
session.commit() |
|
191
|
|
|
elif osm_node == OSM_object_type.relation: |
|
192
|
|
|
for rtc in range(0, RETRY): |
|
193
|
|
|
logging.info('Downloading OSM live tags to this relation: %s.', osm_id) |
|
|
|
|
|
|
194
|
|
|
live_tags_container = osm_live_query.RelationGet(abs(osm_id)) |
|
195
|
|
|
if live_tags_container is not None: |
|
|
|
|
|
|
196
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
|
197
|
|
|
break |
|
198
|
|
|
else: |
|
199
|
|
|
logging.warning('Download of external data has failed.') |
|
200
|
|
|
session.commit() |
|
201
|
|
|
else: |
|
202
|
|
|
logging.warning('Invalid state for live tags.') |
|
203
|
|
|
|
|
204
|
|
|
except Exception as e: |
|
|
|
|
|
|
205
|
|
|
logging.warning('There was an error during OSM request: %s.', e) |
|
206
|
|
|
logging.exception('Exception occurred') |
|
207
|
|
|
logging.warning('Live tag is: {}'.format(cached_node.get('osm_live_tags'))) |
|
|
|
|
|
|
208
|
|
|
# This is a new POI |
|
209
|
|
|
else: |
|
210
|
|
|
# This is a new POI - will add fix me tag to the new items. |
|
211
|
|
|
data.at[i, 'poi_new'] = True |
|
212
|
|
|
# Get the first character of then name of POI and generate a floating number between 0 and 1 |
|
|
|
|
|
|
213
|
|
|
# for a PostGIS function: https://postgis.net/docs/ST_LineInterpolatePoint.html |
|
214
|
|
|
# If there is more than one POI in a building this will try to do a different location and |
|
|
|
|
|
|
215
|
|
|
# not only on center or not only on edge |
|
216
|
|
|
ib = row.get('poi_name') |
|
|
|
|
|
|
217
|
|
|
if ib is not None: |
|
218
|
|
|
ibp = 1 - (((ord(ib[0]) // 16) + 1) / 17) |
|
219
|
|
|
else: |
|
220
|
|
|
ibp = 0.50 |
|
221
|
|
|
# Refine postcode |
|
222
|
|
|
osm_bulding_q = db.query_osm_building_poi_gpd(row.get('poi_lon'), row.get('poi_lat'), |
|
|
|
|
|
|
223
|
|
|
row.get('poi_city'), row.get('poi_postcode'), |
|
|
|
|
|
|
224
|
|
|
row.get('poi_addr_street'), |
|
225
|
|
|
row.get('poi_addr_housenumber'), |
|
226
|
|
|
in_building_percentage=ibp) |
|
227
|
|
|
if osm_bulding_q is not None: |
|
228
|
|
|
logging.info('Relocating POI coordinates to the building with same address: %s %s, %s %s', |
|
|
|
|
|
|
229
|
|
|
row.get('poi_lat'), row.get('poi_lon'), osm_bulding_q.get('lat')[0], |
|
|
|
|
|
|
230
|
|
|
osm_bulding_q.get('lon')[0]), |
|
231
|
|
|
row['poi_lat'], row['poi_lon'] = osm_bulding_q.get('lat')[0], osm_bulding_q.get('lon')[0] |
|
|
|
|
|
|
232
|
|
|
else: |
|
233
|
|
|
logging.info( |
|
234
|
|
|
'The POI is already in its building or there is no building match. \ |
|
235
|
|
|
Keeping POI coordinates as is as.') |
|
236
|
|
|
if row['preserve_original_post_code'] is not True: |
|
237
|
|
|
postcode = query_postcode_osm_external(config.get_geo_prefer_osm_postcode(), session, |
|
|
|
|
|
|
238
|
|
|
data.at[i, 'poi_lon'], data.at[i, 'poi_lat'], |
|
|
|
|
|
|
239
|
|
|
row.get('poi_postcode')) |
|
240
|
|
|
if postcode != row.get('poi_postcode'): |
|
241
|
|
|
logging.info('Changing postcode from %s to %s.', row.get('poi_postcode'), postcode) |
|
|
|
|
|
|
242
|
|
|
data.at[i, 'poi_postcode'] = postcode |
|
243
|
|
|
else: |
|
244
|
|
|
logging.info('Preserving original postcode %s', row.get('poi_postcode')) |
|
245
|
|
|
logging.info('New %s (not %s) type: %s POI: %s %s, %s %s (%s)', row.get('poi_search_name'), |
|
|
|
|
|
|
246
|
|
|
row.get('poi_search_avoid_name'), row.get('poi_type'), row.get('poi_postcode'), |
|
|
|
|
|
|
247
|
|
|
row.get('poi_city'), row.get('poi_addr_street'), |
|
248
|
|
|
row.get('poi_addr_housenumber'), row.get('poi_conscriptionnumber')) |
|
249
|
|
|
except Exception as e: |
|
|
|
|
|
|
250
|
|
|
logging.error(e) |
|
251
|
|
|
logging.error(row) |
|
252
|
|
|
logging.exception('Exception occurred') |
|
253
|
|
|
|
|
254
|
|
|
session.commit() |
|
255
|
|
|
return data |
|
256
|
|
|
except Exception as e: |
|
|
|
|
|
|
257
|
|
|
logging.error(e) |
|
258
|
|
|
logging.exception('Exception occurred') |
|
259
|
|
|
|
|
260
|
|
|
|
|
261
|
|
|
def smart_postcode_check(curr_data, osm_data, pc): |
|
|
|
|
|
|
262
|
|
|
""" |
|
263
|
|
|
Enhancement for the former problem: addr:postcode was changed without |
|
264
|
|
|
changing any other parts of address. Issue #78 |
|
265
|
|
|
|
|
266
|
|
|
When address or conscription number change or postcode is empty. |
|
267
|
|
|
""" |
|
268
|
|
|
# Change postcode when there is no postcode in OSM or the address was changed |
|
269
|
|
|
if pc is not None and pc != '' and osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] != pc and \ |
|
|
|
|
|
|
270
|
|
|
(osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] is None or |
|
271
|
|
|
osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] == '') or \ |
|
272
|
|
|
(curr_data.get('poi_addr_housenumber') != osm_data.iloc[0, osm_data.columns.get_loc('addr:housenumber')] or |
|
|
|
|
|
|
273
|
|
|
curr_data.get('poi_addr_street') != osm_data.iloc[0, osm_data.columns.get_loc('addr:street')] or |
|
|
|
|
|
|
274
|
|
|
curr_data.get('poi_city') != osm_data.iloc[0, osm_data.columns.get_loc('addr:city')] or |
|
275
|
|
|
curr_data.get('poi_addr_conscriptionnumber') != osm_data.iloc[ |
|
276
|
|
|
0, osm_data.columns.get_loc('addr:conscriptionnumber')]): |
|
277
|
|
|
logging.info('Changing postcode from %s to %s.', curr_data.get('poi_postcode'), pc) |
|
278
|
|
|
return pc |
|
279
|
|
|
else: |
|
280
|
|
|
logging.debug('The postcode is %s.', osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')]) |
|
|
|
|
|
|
281
|
|
|
return osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] |
|
282
|
|
|
|