|
1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
try: |
|
4
|
|
|
import logging |
|
5
|
|
|
import sys |
|
6
|
|
|
import requests |
|
|
|
|
|
|
7
|
|
|
import os |
|
8
|
|
|
import pandas as pd |
|
9
|
|
|
from osm_poi_matchmaker.utils import config |
|
10
|
|
|
from osm_poi_matchmaker.libs.soup import download_content |
|
11
|
|
|
except ImportError as err: |
|
12
|
|
|
logging.error('Error %s import module: %s', __name__, err) |
|
13
|
|
|
logging.exception('Exception occurred') |
|
14
|
|
|
|
|
15
|
|
|
sys.exit(128) |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
def save_downloaded_pd(link, file, verify=config.get_download_verify_link(), headers=None): |
|
|
|
|
|
|
19
|
|
|
if config.get_download_use_cached_data() is True and os.path.isfile(file): |
|
20
|
|
|
df = pd.read_csv(file) |
|
|
|
|
|
|
21
|
|
|
else: |
|
22
|
|
|
if link is not None: |
|
23
|
|
|
cvs = download_content(link, verify, None, None, 'utf-16') |
|
24
|
|
|
if cvs is not None: |
|
25
|
|
|
logging.info('We got content, write to file.') |
|
26
|
|
|
if not os.path.exists(config.get_directory_cache_url()): |
|
27
|
|
|
os.makedirs(config.get_directory_cache_url()) |
|
28
|
|
|
with open(file, mode='w', encoding='utf-8') as code: |
|
29
|
|
|
code.write(cvs) |
|
30
|
|
|
df = pd.read_csv(file, encoding='UTF-8', sep='\t', skiprows=0) |
|
|
|
|
|
|
31
|
|
|
else: |
|
32
|
|
|
if os.path.exists(file): |
|
33
|
|
|
logging.info( |
|
34
|
|
|
'The %s link returned error code other than 200 but there is an already downloaded file. Try to open it.', |
|
|
|
|
|
|
35
|
|
|
link) |
|
36
|
|
|
df = pd.read_csv(file, encoding='UTF-8', sep='\t', skiprows=0) |
|
|
|
|
|
|
37
|
|
|
else: |
|
38
|
|
|
logging.warning( |
|
39
|
|
|
'Skipping dataset: %s. There is not downloadable URL, nor already downbloaded file.', link) |
|
|
|
|
|
|
40
|
|
|
else: |
|
41
|
|
|
if os.path.exists(file): |
|
42
|
|
|
df = pd.read_csv(file, encoding='UTF-8', sep='\t', skiprows=0) |
|
|
|
|
|
|
43
|
|
|
logging.info( |
|
44
|
|
|
'Using file only: %s. There is not downloadable URL only just the file. Do not forget to update file manually!', |
|
|
|
|
|
|
45
|
|
|
file) |
|
46
|
|
|
else: |
|
47
|
|
|
logging.warning( |
|
48
|
|
|
'Cannot use download and file: %s. There is not downloadable URL, nor already downbloaded file.', |
|
|
|
|
|
|
49
|
|
|
file) |
|
50
|
|
|
return df |
|
|
|
|
|
|
51
|
|
|
|