1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
2
|
|
|
|
3
|
|
|
__author__ = 'kami911' |
4
|
|
|
|
5
|
|
|
try: |
6
|
|
|
import logging |
7
|
|
|
import sys |
8
|
|
|
import pandas as pd |
9
|
|
|
from sqlalchemy.orm import scoped_session, sessionmaker |
10
|
|
|
from osmapi import OsmApi |
11
|
|
|
from osm_poi_matchmaker.dao.poi_base import POIBase |
12
|
|
|
from osm_poi_matchmaker.utils import config |
13
|
|
|
from osm_poi_matchmaker.dao.data_structure import OSM_object_type, POI_OSM_cache |
14
|
|
|
from osm_poi_matchmaker.libs.osm import query_postcode_osm_external |
15
|
|
|
from osm_poi_matchmaker.dao.data_handlers import get_or_create_cache |
16
|
|
|
except ImportError as err: |
17
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
18
|
|
|
logging.exception('Exception occurred') |
19
|
|
|
|
20
|
|
|
sys.exit(128) |
21
|
|
|
|
22
|
|
|
RETRY = 3 |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
def online_poi_matching(args): |
|
|
|
|
26
|
|
|
data, comm_data = args |
27
|
|
|
try: |
|
|
|
|
28
|
|
|
db = POIBase('{}://{}:{}@{}:{}/{}'.format(config.get_database_type(), config.get_database_writer_username(), |
|
|
|
|
29
|
|
|
config.get_database_writer_password(), |
30
|
|
|
config.get_database_writer_host(), |
31
|
|
|
config.get_database_writer_port(), |
32
|
|
|
config.get_database_poi_database())) |
33
|
|
|
pgsql_pool = db.pool |
34
|
|
|
session_factory = sessionmaker(pgsql_pool) |
35
|
|
|
Session = scoped_session(session_factory) |
|
|
|
|
36
|
|
|
session = Session() |
37
|
|
|
osm_live_query = OsmApi() |
38
|
|
|
for i, row in data.iterrows(): |
39
|
|
|
# for i, row in data[data['poi_code'].str.contains('posta')].iterrows(): |
40
|
|
|
try: |
41
|
|
|
# Try to search OSM POI with same type, and name contains poi_search_name within the specified distance |
|
|
|
|
42
|
|
|
osm_query = db.query_osm_shop_poi_gpd(row.get('poi_lon'), row.get('poi_lat'), |
43
|
|
|
comm_data.loc[comm_data['pc_id'] == row.get('poi_common_id')][ |
|
|
|
|
44
|
|
|
'poi_type'].values[0], row.get('poi_search_name'), |
|
|
|
|
45
|
|
|
row.get('poi_search_avoid_name'), |
46
|
|
|
row.get('poi_addr_street'), row.get('poi_addr_housenumber'), |
|
|
|
|
47
|
|
|
row.get('poi_conscriptionnumber'), row.get('poi_city'), |
|
|
|
|
48
|
|
|
row.get('osm_search_distance_perfect'), |
49
|
|
|
row.get('osm_search_distance_safe'), |
50
|
|
|
row.get('osm_search_distance_unsafe')) |
51
|
|
|
# Enrich our data with OSM database POI metadata |
52
|
|
|
if osm_query is not None: |
53
|
|
|
row['poi_new'] = False |
54
|
|
|
# Collect additional OSM metadata. Note: this needs style change during osm2pgsql |
|
|
|
|
55
|
|
|
osm_id = osm_query['osm_id'].values[0] if osm_query.get('osm_id') is not None else None |
|
|
|
|
56
|
|
|
osm_node = osm_query.get('node').values[0] if osm_query.get('node') is not None else None |
|
|
|
|
57
|
|
|
# Set OSM POI coordinates for all kind of geom |
58
|
|
|
lat = osm_query.get('lat').values[0] |
59
|
|
|
lon = osm_query.get('lon').values[0] |
60
|
|
|
if data.at[i, 'poi_lat'] != lat and data.at[i, 'poi_lon'] != lon: |
61
|
|
|
logging.info('Using new coodinates %s %s instead of %s %s.', |
62
|
|
|
lat, lon, data.at[i, 'poi_lat'], data.at[i, 'poi_lon']) |
63
|
|
|
data.at[i, 'poi_lat'] = lat |
64
|
|
|
data.at[i, 'poi_lon'] = lon |
65
|
|
|
if osm_node == 'node': |
66
|
|
|
osm_node = OSM_object_type.node |
67
|
|
|
elif osm_node == 'way': |
68
|
|
|
osm_node = OSM_object_type.way |
69
|
|
|
elif osm_node == 'relation': |
70
|
|
|
osm_node = OSM_object_type.relation |
71
|
|
|
else: |
72
|
|
|
logging.warning('Illegal state: %s', osm_query['node'].values[0]) |
73
|
|
|
data.at[i, 'osm_id'] = osm_id |
74
|
|
|
data.at[i, 'osm_node'] = osm_node |
75
|
|
|
# Refine postcode |
76
|
|
|
if row['preserve_original_post_code'] is not True: |
77
|
|
|
# Current OSM postcode based on lat,long query. |
78
|
|
|
postcode = query_postcode_osm_external(config.get_geo_prefer_osm_postcode(), session, lon, lat, row.get('poi_postcode')) |
|
|
|
|
79
|
|
|
force_postcode_change = False # TODO: Has to be a setting in app.conf |
|
|
|
|
80
|
|
|
if force_postcode_change is True: |
81
|
|
|
# Force to use datasource postcode |
82
|
|
|
if postcode != row.get('poi_postcode'): |
83
|
|
|
logging.info('Changing postcode from %s to %s.', row.get('poi_postcode'), postcode) |
|
|
|
|
84
|
|
|
data.at[i, 'poi_postcode'] = postcode |
85
|
|
|
else: |
86
|
|
|
# Try to use smart method for postcode check |
87
|
|
|
ch_posctode = smart_postcode_check(row, osm_query, postcode) |
88
|
|
|
if ch_posctode is not None: |
89
|
|
|
data.at[i, 'poi_postcode'] = ch_posctode |
90
|
|
|
else: |
91
|
|
|
logging.info('Preserving original postcode %s', row.get('poi_postcode')) |
92
|
|
|
data.at[i, 'osm_version'] = osm_query['osm_version'].values[0] \ |
93
|
|
|
if osm_query['osm_version'] is not None else None |
94
|
|
|
data.at[i, 'osm_changeset'] = osm_query['osm_changeset'].values[0] \ |
95
|
|
|
if osm_query['osm_changeset'] is not None else None |
96
|
|
|
if osm_query['osm_timestamp'] is not None: |
97
|
|
|
osm_query['osm_timestamp'] = \ |
98
|
|
|
data.at[i, 'osm_timestamp'] = pd.to_datetime(str((osm_query['osm_timestamp'].values[0]))) |
|
|
|
|
99
|
|
|
else: |
100
|
|
|
osm_query['osm_timestamp'] = None |
101
|
|
|
data.at[i, 'poi_distance'] = osm_query.get('distance').values[0] if osm_query.get('distance') is not None else None |
|
|
|
|
102
|
|
|
# For OSM way also query node points |
103
|
|
|
if osm_node == OSM_object_type.way: |
104
|
|
|
logging.info('This is an OSM way looking for id %s nodes.', osm_id) |
105
|
|
|
# Add list of nodes to the dataframe |
106
|
|
|
nodes = db.query_ways_nodes(osm_id) |
107
|
|
|
data.at[i, 'osm_nodes'] = nodes |
108
|
|
|
elif osm_node == OSM_object_type.relation: |
109
|
|
|
logging.info('This is an OSM relation looking for id %s nodes.', osm_id) |
110
|
|
|
# Add list of relation nodes to the dataframe |
111
|
|
|
nodes = db.query_relation_nodes(osm_id) |
112
|
|
|
data.at[i, 'osm_nodes'] = nodes |
113
|
|
|
logging.info('Old %s (not %s) type: %s POI within %s m: %s %s, %s %s (%s)', |
114
|
|
|
data.at[i, 'poi_search_name'], data.at[i, 'poi_search_avoid_name'], |
|
|
|
|
115
|
|
|
data.at[i, 'poi_type'], data.at[i, 'poi_distance'], |
116
|
|
|
data.at[i, 'poi_postcode'], data.at[i, 'poi_city'], data.at[i, 'poi_addr_street'], |
|
|
|
|
117
|
|
|
data.at[i, 'poi_addr_housenumber'], data.at[i, 'poi_conscriptionnumber']) |
|
|
|
|
118
|
|
|
try: |
119
|
|
|
# Download OSM POI way live tags |
120
|
|
|
if osm_node == OSM_object_type.way: |
121
|
|
|
for rtc in range(0, RETRY): |
|
|
|
|
122
|
|
|
logging.info('Downloading OSM live tags to this way: %s.', osm_id) |
123
|
|
|
cached_way = db.query_from_cache(osm_id, osm_node) |
124
|
|
|
if cached_way is None: |
125
|
|
|
live_tags_container = osm_live_query.WayGet(osm_id) |
126
|
|
|
if live_tags_container is not None: |
|
|
|
|
127
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
128
|
|
|
cache_row = {'osm_id': int(osm_id), |
129
|
|
|
'osm_live_tags': live_tags_container.get('tag'), |
|
|
|
|
130
|
|
|
'osm_version': live_tags_container.get('version'), |
|
|
|
|
131
|
|
|
'osm_user': live_tags_container.get('user'), |
132
|
|
|
'osm_user_id': live_tags_container.get('uid'), |
133
|
|
|
'osm_changeset': live_tags_container.get('changeset'), |
|
|
|
|
134
|
|
|
'osm_timestamp': live_tags_container.get('timestamp'), |
|
|
|
|
135
|
|
|
'osm_object_type': osm_node, |
136
|
|
|
'osm_lat': None, |
137
|
|
|
'osm_lon': None, |
138
|
|
|
'osm_nodes': live_tags_container.get('nd')} |
139
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
140
|
|
|
# Downloading referenced nodes of the way |
141
|
|
|
for way_nodes in live_tags_container['nd']: |
142
|
|
|
logging.debug('Getting node %s belongs to way %s', way_nodes, osm_id) |
|
|
|
|
143
|
|
|
live_tags_node = osm_live_query.NodeGet(way_nodes) |
144
|
|
|
cache_row = {'osm_id': int(way_nodes), |
145
|
|
|
'osm_live_tags': live_tags_node.get('tag'), |
146
|
|
|
'osm_version': live_tags_node.get('version'), |
|
|
|
|
147
|
|
|
'osm_user': live_tags_node.get('user'), |
148
|
|
|
'osm_user_id': live_tags_node.get('uid'), |
149
|
|
|
'osm_changeset': live_tags_node.get('changeset'), |
|
|
|
|
150
|
|
|
'osm_timestamp': live_tags_node.get('timestamp'), |
|
|
|
|
151
|
|
|
'osm_object_type': OSM_object_type.node, |
152
|
|
|
'osm_lat': live_tags_node.get('lat'), |
153
|
|
|
'osm_lon': live_tags_node.get('lon'), |
154
|
|
|
'osm_nodes': None} |
155
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
156
|
|
|
break |
157
|
|
|
else: |
158
|
|
|
logging.warning('Download of external data has failed.') |
159
|
|
|
else: |
160
|
|
|
data.at[i, 'osm_live_tags'] = cached_way.get('osm_live_tags') |
161
|
|
|
break |
162
|
|
|
session.commit() |
163
|
|
|
# Download OSM POI node live tags |
164
|
|
|
elif osm_node == OSM_object_type.node: |
165
|
|
|
for rtc in range(0, RETRY): |
166
|
|
|
logging.info('Downloading OSM live tags to this node: %s.', osm_id) |
167
|
|
|
cached_node = db.query_from_cache(osm_id, osm_node) |
168
|
|
|
if cached_node is None: |
169
|
|
|
live_tags_container = osm_live_query.NodeGet(osm_id) |
170
|
|
|
if live_tags_container is not None: |
|
|
|
|
171
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
172
|
|
|
cache_row = {'osm_id': int(osm_id), |
173
|
|
|
'osm_live_tags': live_tags_container.get('tag'), |
|
|
|
|
174
|
|
|
'osm_version': live_tags_container.get('version'), |
|
|
|
|
175
|
|
|
'osm_user': live_tags_container.get('user'), |
176
|
|
|
'osm_user_id': live_tags_container.get('uid'), |
177
|
|
|
'osm_changeset': live_tags_container.get('changeset'), |
|
|
|
|
178
|
|
|
'osm_timestamp': live_tags_container.get('timestamp'), |
|
|
|
|
179
|
|
|
'osm_object_type': osm_node, |
180
|
|
|
'osm_lat': live_tags_container.get('lat'), |
181
|
|
|
'osm_lon': live_tags_container.get('lon'), |
182
|
|
|
'osm_nodes': None} |
183
|
|
|
get_or_create_cache(session, POI_OSM_cache, **cache_row) |
184
|
|
|
break |
185
|
|
|
else: |
186
|
|
|
logging.warning('Download of external data has failed.') |
187
|
|
|
else: |
188
|
|
|
data.at[i, 'osm_live_tags'] = cached_node.get('osm_live_tags') |
189
|
|
|
break |
190
|
|
|
session.commit() |
191
|
|
|
elif osm_node == OSM_object_type.relation: |
192
|
|
|
for rtc in range(0, RETRY): |
193
|
|
|
logging.info('Downloading OSM live tags to this relation: %s.', osm_id) |
|
|
|
|
194
|
|
|
live_tags_container = osm_live_query.RelationGet(abs(osm_id)) |
195
|
|
|
if live_tags_container is not None: |
|
|
|
|
196
|
|
|
data.at[i, 'osm_live_tags'] = live_tags_container.get('tag') |
197
|
|
|
break |
198
|
|
|
else: |
199
|
|
|
logging.warning('Download of external data has failed.') |
200
|
|
|
session.commit() |
201
|
|
|
else: |
202
|
|
|
logging.warning('Invalid state for live tags.') |
203
|
|
|
|
204
|
|
|
except Exception as e: |
|
|
|
|
205
|
|
|
logging.warning('There was an error during OSM request: %s.', e) |
206
|
|
|
logging.exception('Exception occurred') |
207
|
|
|
logging.warning('Live tag is: {}'.format(cached_node.get('osm_live_tags'))) |
|
|
|
|
208
|
|
|
# This is a new POI |
209
|
|
|
else: |
210
|
|
|
# This is a new POI - will add fix me tag to the new items. |
211
|
|
|
data.at[i, 'poi_new'] = True |
212
|
|
|
# Get the first character of then name of POI and generate a floating number between 0 and 1 |
|
|
|
|
213
|
|
|
# for a PostGIS function: https://postgis.net/docs/ST_LineInterpolatePoint.html |
214
|
|
|
# If there is more than one POI in a building this will try to do a different location and |
|
|
|
|
215
|
|
|
# not only on center or not only on edge |
216
|
|
|
ib = row.get('poi_name') |
|
|
|
|
217
|
|
|
if ib is not None: |
218
|
|
|
ibp = 1 - (((ord(ib[0]) // 16) + 1) / 17) |
219
|
|
|
else: |
220
|
|
|
ibp = 0.50 |
221
|
|
|
# Refine postcode |
222
|
|
|
osm_bulding_q = db.query_osm_building_poi_gpd(row.get('poi_lon'), row.get('poi_lat'), |
|
|
|
|
223
|
|
|
row.get('poi_city'), row.get('poi_postcode'), |
|
|
|
|
224
|
|
|
row.get('poi_addr_street'), |
225
|
|
|
row.get('poi_addr_housenumber'), |
226
|
|
|
in_building_percentage=ibp) |
227
|
|
|
if osm_bulding_q is not None: |
228
|
|
|
logging.info('Relocating POI coordinates to the building with same address: %s %s, %s %s', |
|
|
|
|
229
|
|
|
row.get('poi_lat'), row.get('poi_lon'), osm_bulding_q.get('lat')[0], |
|
|
|
|
230
|
|
|
osm_bulding_q.get('lon')[0]), |
231
|
|
|
row['poi_lat'], row['poi_lon'] = osm_bulding_q.get('lat')[0], osm_bulding_q.get('lon')[0] |
|
|
|
|
232
|
|
|
else: |
233
|
|
|
logging.info( |
234
|
|
|
'The POI is already in its building or there is no building match. \ |
235
|
|
|
Keeping POI coordinates as is as.') |
236
|
|
|
if row['preserve_original_post_code'] is not True: |
237
|
|
|
postcode = query_postcode_osm_external(config.get_geo_prefer_osm_postcode(), session, |
|
|
|
|
238
|
|
|
data.at[i, 'poi_lon'], data.at[i, 'poi_lat'], |
|
|
|
|
239
|
|
|
row.get('poi_postcode')) |
240
|
|
|
if postcode != row.get('poi_postcode'): |
241
|
|
|
logging.info('Changing postcode from %s to %s.', row.get('poi_postcode'), postcode) |
|
|
|
|
242
|
|
|
data.at[i, 'poi_postcode'] = postcode |
243
|
|
|
else: |
244
|
|
|
logging.info('Preserving original postcode %s', row.get('poi_postcode')) |
245
|
|
|
logging.info('New %s (not %s) type: %s POI: %s %s, %s %s (%s)', row.get('poi_search_name'), |
|
|
|
|
246
|
|
|
row.get('poi_search_avoid_name'), row.get('poi_type'), row.get('poi_postcode'), |
|
|
|
|
247
|
|
|
row.get('poi_city'), row.get('poi_addr_street'), |
248
|
|
|
row.get('poi_addr_housenumber'), row.get('poi_conscriptionnumber')) |
249
|
|
|
except Exception as e: |
|
|
|
|
250
|
|
|
logging.error(e) |
251
|
|
|
logging.error(row) |
252
|
|
|
logging.exception('Exception occurred') |
253
|
|
|
|
254
|
|
|
session.commit() |
255
|
|
|
return data |
256
|
|
|
except Exception as e: |
|
|
|
|
257
|
|
|
logging.error(e) |
258
|
|
|
logging.exception('Exception occurred') |
259
|
|
|
|
260
|
|
|
|
261
|
|
|
def smart_postcode_check(curr_data, osm_data, pc): |
|
|
|
|
262
|
|
|
""" |
263
|
|
|
Enhancement for the former problem: addr:postcode was changed without |
264
|
|
|
changing any other parts of address. Issue #78 |
265
|
|
|
|
266
|
|
|
When address or conscription number change or postcode is empty. |
267
|
|
|
""" |
268
|
|
|
# Change postcode when there is no postcode in OSM or the address was changed |
269
|
|
|
if pc is not None and pc != '' and osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] != pc and \ |
|
|
|
|
270
|
|
|
(osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] is None or |
271
|
|
|
osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] == '') or \ |
272
|
|
|
(curr_data.get('poi_addr_housenumber') != osm_data.iloc[0, osm_data.columns.get_loc('addr:housenumber')] or |
|
|
|
|
273
|
|
|
curr_data.get('poi_addr_street') != osm_data.iloc[0, osm_data.columns.get_loc('addr:street')] or |
|
|
|
|
274
|
|
|
curr_data.get('poi_city') != osm_data.iloc[0, osm_data.columns.get_loc('addr:city')] or |
275
|
|
|
curr_data.get('poi_addr_conscriptionnumber') != osm_data.iloc[ |
276
|
|
|
0, osm_data.columns.get_loc('addr:conscriptionnumber')]): |
277
|
|
|
logging.info('Changing postcode from %s to %s.', curr_data.get('poi_postcode'), pc) |
278
|
|
|
return pc |
279
|
|
|
else: |
280
|
|
|
logging.debug('The postcode is %s.', osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')]) |
|
|
|
|
281
|
|
|
return osm_data.iloc[0, osm_data.columns.get_loc('addr:postcode')] |
282
|
|
|
|