|
1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
try: |
|
4
|
|
|
import logging |
|
5
|
|
|
import sys |
|
6
|
|
|
import os |
|
7
|
|
|
import re |
|
|
|
|
|
|
8
|
|
|
import json |
|
9
|
|
|
from osm_poi_matchmaker.libs.soup import save_downloaded_soup |
|
10
|
|
|
from osm_poi_matchmaker.libs.address import extract_street_housenumber_better_2, clean_city, \ |
|
11
|
|
|
extract_javascript_variable, clean_opening_hours_2, clean_phone_to_str |
|
12
|
|
|
from osm_poi_matchmaker.libs.geo import check_hu_boundary |
|
13
|
|
|
from osm_poi_matchmaker.libs.osm_tag_sets import POS_HU_GEN, PAY_CASH |
|
14
|
|
|
from osm_poi_matchmaker.utils.data_provider import DataProvider |
|
15
|
|
|
from osm_poi_matchmaker.utils.enums import FileType |
|
16
|
|
|
except ImportError as err: |
|
17
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
|
18
|
|
|
logging.exception('Exception occurred') |
|
19
|
|
|
|
|
20
|
|
|
sys.exit(128) |
|
21
|
|
|
|
|
22
|
|
|
|
|
23
|
|
|
class hu_cba(DataProvider): |
|
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
def constains(self): |
|
26
|
|
|
self.link = 'http://www.cba.hu/uzletlista' |
|
27
|
|
|
self.tags = {} |
|
28
|
|
|
self.tags.update(POS_HU_GEN) |
|
29
|
|
|
self.tags.update(PAY_CASH) |
|
30
|
|
|
self.filetype = FileType.html |
|
31
|
|
|
self.filename = '{}.{}'.format( |
|
32
|
|
|
self.__class__.__name__, self.filetype.name) |
|
33
|
|
|
|
|
34
|
|
|
def types(self): |
|
35
|
|
|
hucbacon = {'shop': 'convenience', 'brand': 'CBA', } |
|
36
|
|
|
hucbasup = {'shop': 'supermarket', 'brand': 'CBA', } |
|
37
|
|
|
huprimacon = {'shop': 'convenience', 'brand': 'Príma', } |
|
38
|
|
|
huprimasup = {'shop': 'supermarket', 'brand': 'Príma', } |
|
39
|
|
|
for i in [hucbacon, hucbasup, huprimacon, huprimasup]: |
|
40
|
|
|
i.update(self.tags) |
|
41
|
|
|
self.__types = [ |
|
42
|
|
|
{'poi_code': 'hucbacon', 'poi_name': 'CBA', 'poi_type': 'shop', |
|
43
|
|
|
'poi_tags': hucbacon, 'poi_url_base': 'https://www.cba.hu', 'poi_search_name': '(cba abc|cba)', |
|
|
|
|
|
|
44
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200, 'osm_search_distance_unsafe': 5, |
|
|
|
|
|
|
45
|
|
|
'preserve_original_name': True}, |
|
46
|
|
|
{'poi_code': 'hucbasup', 'poi_name': 'CBA', 'poi_type': 'shop', |
|
47
|
|
|
'poi_tags': hucbasup, 'poi_url_base': 'https://www.cba.hu', 'poi_search_name': '(cba abc|cba)', |
|
|
|
|
|
|
48
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200, 'osm_search_distance_unsafe': 5, |
|
|
|
|
|
|
49
|
|
|
'preserve_original_name': True}, |
|
50
|
|
|
{'poi_code': 'huprimacon', 'poi_name': 'Príma', 'poi_type': 'shop', |
|
51
|
|
|
'poi_tags': huprimacon, 'poi_url_base': 'https://www.prima.hu', |
|
52
|
|
|
'poi_search_name': '(príma abc|prima abc|príma|prima)', |
|
53
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200, 'osm_search_distance_unsafe': 23, |
|
|
|
|
|
|
54
|
|
|
'preserve_original_name': True}, |
|
55
|
|
|
{'poi_code': 'huprimasup', 'poi_name': 'Príma', 'poi_type': 'shop', |
|
56
|
|
|
'poi_tags': huprimasup, 'poi_url_base': 'https://www.prima.hu', |
|
57
|
|
|
'poi_search_name': '(príma abc|prima abc|príma|prima)', |
|
58
|
|
|
'osm_search_distance_perfect': 2000, 'osm_search_distance_safe': 200, 'osm_search_distance_unsafe': 23, |
|
|
|
|
|
|
59
|
|
|
'preserve_original_name': True}, |
|
60
|
|
|
] |
|
61
|
|
|
return self.__types |
|
62
|
|
|
|
|
63
|
|
|
def process(self): |
|
64
|
|
|
try: |
|
65
|
|
|
soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), |
|
|
|
|
|
|
66
|
|
|
self.filetype) |
|
67
|
|
|
if soup is not None: |
|
68
|
|
|
# parse the html using beautiful soap and store in variable `soup` |
|
69
|
|
|
text = json.loads( |
|
70
|
|
|
extract_javascript_variable(soup, 'boltok_nyers')) |
|
71
|
|
|
for poi_data in text: |
|
72
|
|
|
# Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom |
|
|
|
|
|
|
73
|
|
|
self.data.city = clean_city(poi_data.get('A_VAROS')) |
|
74
|
|
|
self.data.postcode = poi_data.get('A_IRSZ').strip() |
|
75
|
|
|
self.data.branch = poi_data.get('P_NAME').strip() |
|
76
|
|
|
self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA' |
|
77
|
|
|
self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon' |
|
78
|
|
|
for i in range(0, 7): |
|
79
|
|
|
self.data.day_open(i, clean_opening_hours_2( |
|
80
|
|
|
poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))) if poi_data.get( |
|
81
|
|
|
'PS_OPEN_FROM_{}'.format( |
|
|
|
|
|
|
82
|
|
|
i + 1)) is not None else None) |
|
83
|
|
|
self.data.day_close(i, clean_opening_hours_2( |
|
84
|
|
|
poi_data.get('PS_OPEN_TO_{}'.format(i + 1))) if poi_data.get( |
|
85
|
|
|
'PS_OPEN_TO_{}'.format( |
|
|
|
|
|
|
86
|
|
|
i + 1)) is not None else None) |
|
87
|
|
|
self.data.original = poi_data.get('A_CIM') |
|
88
|
|
|
self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('PS_GPS_COORDS_LAT'), |
|
|
|
|
|
|
89
|
|
|
poi_data.get('PS_GPS_COORDS_LNG')) |
|
|
|
|
|
|
90
|
|
|
self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( |
|
|
|
|
|
|
91
|
|
|
poi_data.get('A_CIM')) |
|
92
|
|
|
if 'PS_PUBLIC_TEL' in poi_data and poi_data.get('PS_PUBLIC_TEL') != '': |
|
93
|
|
|
self.data.phone = clean_phone_to_str( |
|
94
|
|
|
poi_data.get('PS_PUBLIC_TEL')) |
|
95
|
|
|
else: |
|
96
|
|
|
self.data.phone = None |
|
97
|
|
|
if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get('PS_PUBLIC_EMAIL') != '': |
|
98
|
|
|
self.data.email = poi_data.get('PS_PUBLIC_EMAIL') |
|
99
|
|
|
else: |
|
100
|
|
|
self.data.email = None |
|
101
|
|
|
self.data.public_holiday_open = False |
|
102
|
|
|
self.data.add() |
|
103
|
|
|
except Exception as e: |
|
|
|
|
|
|
104
|
|
|
logging.exception('Exception occurred') |
|
105
|
|
|
|
|
106
|
|
|
logging.error(e) |
|
107
|
|
|
|