osm_poi_matchmaker.libs.soup.save_downloaded_soup() - Code Metrics - KAMI911/osm_poi_matchmaker - Measure and Improve Code Quality continuously with Scrutinizer

save_downloaded_soup() F
last analyzed 2023-01-03 16:24 UTC

↳ Parent: osm_poi_matchmaker.libs.soup

Complexity

Conditions

Size

Total Lines	46
Code Lines	40

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	15
eloc	40
nop	6
dl	0
loc	46
rs	2.9998
c	0
b	0
f	0

How to fix Complexity

# -*- coding: utf-8 -*-


try:
    import logging
    import sys
    import requests
    import os
    from bs4 import BeautifulSoup
    from osm_poi_matchmaker.utils import config
    from osm_poi_matchmaker.utils.enums import FileType
except ImportError as err:
    logging.error('Error %s import module: %s', __name__, err)
    logging.exception('Exception occurred')

    sys.exit(128)


def download_content(link, verify_link=config.get_download_verify_link(), post_parm=None, headers=None,

                     encoding='utf-8'):
    try:
        if post_parm is None:
            logging.debug('Downloading without post parameters.')
            page = requests.get(link, verify=verify_link, headers=headers)
            page.encoding = encoding
        else:
            logging.debug('Downloading with post parameters.')
            headers_static = {"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"}
            if headers is not None:
                headers.update(headers_static)
            else:
                headers = headers_static
            page = requests.post(link, verify=verify_link, data=post_parm, headers=headers)
            page.encoding = encoding
    except requests.exceptions.ConnectionError as e:

        logging.warning('Unable to open connection. (%s)', e)
        return None
    return page.text if page.status_code == 200 else None


def save_downloaded_soup(link, file, filetype, post_data=None, verify=config.get_download_verify_link(), headers=None):

    if config.get_download_use_cached_data() is True and os.path.isfile(file):
        soup = readfile(file, filetype)
    else:
        if link is not None:
            soup = download_content(link, verify, post_data, headers)
            if soup is not None:
                logging.info('We got content, write to file.')
                if not os.path.exists(config.get_directory_cache_url()):
                    os.makedirs(config.get_directory_cache_url())
                with open(file, mode='w', encoding='utf-8') as code:
                    if filetype == FileType.html:
                        soup = BeautifulSoup(soup, 'html.parser')
                        code.write(str(soup.prettify()))
                    elif filetype == FileType.xml:
                        soup = BeautifulSoup(soup, 'lxml', from_encoding='utf-8')
                        logging.debug('original encoding: %s', soup.original_encoding)
                        code.write(str(soup.prettify()))
                    elif filetype == FileType.csv or filetype == FileType.json:

                        code.write(str(soup))
                    else:
                        logging.error('Unexpected type to write: %s', filetype)
            else:
                if os.path.exists(file):
                    logging.info(
                        'The %s link returned error code other than 200 but there is an already downloaded file. Try to open it.',

                        link)
                    soup = readfile(file, filetype)
                else:
                    logging.warning(
                        'Skipping dataset: %s. There is not downloadable URL, nor already downbloaded file.', link)

        else:
            if os.path.exists(file):
                soup = readfile(file, filetype)
                if filetype == FileType.html:
                    soup = BeautifulSoup(soup, 'html.parser')
                elif filetype == FileType.xml:
                    soup = BeautifulSoup(soup, 'lxml')
                logging.info(
                    'Using file only: %s. There is not downloadable URL only just the file. Do not forget to update file manually!',

                    file)
            else:
                logging.warning(
                    'Cannot use download and file: %s. There is not downloadable URL, nor already downbloaded file.',

                    file)
    return soup



def readfile(r_filename, r_filetype):

    try:
        if os.path.exists(r_filename):

            with open(r_filename, mode='r', encoding='utf-8') as code:
                if r_filetype == FileType.html:
                    soup = BeautifulSoup(code.read(), 'html.parser')
                elif r_filetype == FileType.csv or r_filetype == FileType.json or r_filetype == FileType.xml:

                    soup = code.read()
                else:
                    logging.error('Unexpected type to read: %s', r_filetype)
            return soup

        else:
            return None
    except Exception as e:

        logging.error(e)
        logging.exception('Exception occurred')


1			# -- coding: utf-8 --
			0 ignored issues – show introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2
3			try:
4			import logging
5			import sys
6			import requests
7			import os
8			from bs4 import BeautifulSoup
9			from osm_poi_matchmaker.utils import config
10			from osm_poi_matchmaker.utils.enums import FileType
11			except ImportError as err:
12			logging.error('Error %s import module: %s', __name__, err)
13			logging.exception('Exception occurred')
14
15			sys.exit(128)
16
17
18			def download_content(link, verify_link=config.get_download_verify_link(), post_parm=None, headers=None,
			0 ignored issues – show Coding Style introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (103/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history... introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
19			encoding='utf-8'):
20			try:
21			if post_parm is None:
22			logging.debug('Downloading without post parameters.')
23			page = requests.get(link, verify=verify_link, headers=headers)
24			page.encoding = encoding
25			else:
26			logging.debug('Downloading with post parameters.')
27			headers_static = {"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"}
28			if headers is not None:
29			headers.update(headers_static)
30			else:
31			headers = headers_static
32			page = requests.post(link, verify=verify_link, data=post_parm, headers=headers)
33			page.encoding = encoding
34			except requests.exceptions.ConnectionError as e:
			0 ignored issues – show Coding Style Naming introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Variable name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
35			logging.warning('Unable to open connection. (%s)', e)
36			return None
37			return page.text if page.status_code == 200 else None
38
39
40			def save_downloaded_soup(link, file, filetype, post_data=None, verify=config.get_download_verify_link(), headers=None):
			0 ignored issues – show Coding Style introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (119/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history... introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... best-practice introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Too many arguments (6/5) Loading history...
41			if config.get_download_use_cached_data() is True and os.path.isfile(file):
42			soup = readfile(file, filetype)
43			else:
44			if link is not None:
45			soup = download_content(link, verify, post_data, headers)
46			if soup is not None:
47			logging.info('We got content, write to file.')
48			if not os.path.exists(config.get_directory_cache_url()):
49			os.makedirs(config.get_directory_cache_url())
50			with open(file, mode='w', encoding='utf-8') as code:
51			if filetype == FileType.html:
52			soup = BeautifulSoup(soup, 'html.parser')
53			code.write(str(soup.prettify()))
54			elif filetype == FileType.xml:
55			soup = BeautifulSoup(soup, 'lxml', from_encoding='utf-8')
56			logging.debug('original encoding: %s', soup.original_encoding)
57			code.write(str(soup.prettify()))
58			elif filetype == FileType.csv or filetype == FileType.json:
			0 ignored issues – show Unused Code introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Consider merging these comparisons with "in" to 'filetype in (FileType.csv, FileType.json)' Loading history...
59			code.write(str(soup))
60			else:
61			logging.error('Unexpected type to write: %s', filetype)
62			else:
63			if os.path.exists(file):
64			logging.info(
65			'The %s link returned error code other than 200 but there is an already downloaded file. Try to open it.',
			0 ignored issues – show Coding Style introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (130/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
66			link)
67			soup = readfile(file, filetype)
68			else:
69			logging.warning(
70			'Skipping dataset: %s. There is not downloadable URL, nor already downbloaded file.', link)
			0 ignored issues – show Coding Style introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (115/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
71			else:
72			if os.path.exists(file):
73			soup = readfile(file, filetype)
74			if filetype == FileType.html:
75			soup = BeautifulSoup(soup, 'html.parser')
76			elif filetype == FileType.xml:
77			soup = BeautifulSoup(soup, 'lxml')
78			logging.info(
79			'Using file only: %s. There is not downloadable URL only just the file. Do not forget to update file manually!',
			0 ignored issues – show Coding Style introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (132/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
80			file)
81			else:
82			logging.warning(
83			'Cannot use download and file: %s. There is not downloadable URL, nor already downbloaded file.',
			0 ignored issues – show Coding Style introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (117/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
84			file)
85			return soup
			0 ignored issues – show introduced 2020-10-19 20:05 UTC by Report Bug Copy Issue Report The variable `soup` does not seem to be defined for all execution paths. Loading history...
86
87
88			def readfile(r_filename, r_filetype):
			0 ignored issues – show introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
89			try:
90			if os.path.exists(r_filename):
			0 ignored issues – show unused-code introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Unnecessary "else" after "return" Loading history...
91			with open(r_filename, mode='r', encoding='utf-8') as code:
92			if r_filetype == FileType.html:
93			soup = BeautifulSoup(code.read(), 'html.parser')
94			elif r_filetype == FileType.csv or r_filetype == FileType.json or r_filetype == FileType.xml:
			0 ignored issues – show Coding Style introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (109/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history... Unused Code introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Consider merging these comparisons with "in" to 'r_filetype in (FileType.csv, FileType.json, FileType.xml)' Loading history...
95			soup = code.read()
96			else:
97			logging.error('Unexpected type to read: %s', r_filetype)
98			return soup
			0 ignored issues – show introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report The variable `soup` does not seem to be defined for all execution paths. Loading history...
99			else:
100			return None
101			except Exception as e:
			0 ignored issues – show Best Practice introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Catching very general exceptions such as `Exception` is usually not recommended. Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception. Loading history... Coding Style Naming introduced 2020-10-11 06:10 UTC by Report Bug Copy Issue Report Variable name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
102			logging.error(e)
103			logging.exception('Exception occurred')
104

KAMI911 / osm_poi_matchmaker

save_downloaded_soup() F last analyzed 2023-01-03 16:24 UTC

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

save_downloaded_soup() F
last analyzed 2023-01-03 16:24 UTC