Passed
Push — master ( 8c7e96...605616 )
by torrua
01:16
created

app.site.functions.get_data()   C

Complexity

Conditions 8

Size

Total Lines 61
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 44
dl 0
loc 61
rs 6.9573
c 0
b 0
f 0
cc 8
nop 3

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
import re
2
import urllib
3
4
from bs4 import BeautifulSoup
5
6
from app.logger import log
7
8
9
def get_data(
10
    url: str, parser: str = "lxml", headers: dict = None
11
) -> dict[str, bool | str | BeautifulSoup]:
12
    """
13
    This function downloads and parses content of URL site
14
    :url: address of needed site or directory
15
    :return: dict with elements:
16
             > :"result":  *bool* with result of downloading process
17
             > :"content": *BeautifulSoup* with elements if Result is True
18
                            OR
19
                           *str* with error message if Result is False
20
    """
21
    cntnt, rslt, msg = "content", "result", "message"
22
    pattern_http = "^http"
23
    m_l = {
24
        "start": "Начинаем загрузку данных с сайта",
25
        "error": "Не удалось получить данные:\n\t>> Адрес:\t%s\n\t>> Ошибка:\t%s",
26
        "get_site": "Пробуем скачать данные с ресурса",
27
        "url_check": "Проверяем, являются ли введенные данные адресом веб-страницы",
28
        "url_correct": "Введен корректный адрес веб-страницы:\t%s",
29
        "path_check": "Проверяем, являются ли введенные данные адресом файла \n\t>> Адрес:\t%s",
30
        "parse": "Пробуем обработать полученные данные",
31
        "agent": "Содержимое строки headers:\n\t>>\t%s",
32
        "success": "Данные с сайта успешно загружены",
33
    }
34
35
    log.info(m_l["start"])
36
    log.debug(m_l["url_check"])
37
38
    if re.match(pattern_http, url):
39
        log.debug(m_l["url_correct"], url)
40
        try:
41
            log.debug(m_l["get_site"])
42
            if url.lower().startswith("http"):
43
                request_to_site = urllib.request.Request(
44
                    url=url, headers=headers if headers else {}
45
                )
46
            else:
47
                raise ValueError from None
48
            with urllib.request.urlopen(request_to_site) as response:
49
                try:
50
                    log.debug(m_l["parse"])
51
                    site_data = BeautifulSoup(response, parser)
52
                except urllib.error.HTTPError as err:
53
                    log.error(m_l["error"], *(url, err))
54
                    return {rslt: False, cntnt: str(err), msg: 5152}
55
        except urllib.error.URLError as err:
56
            log.error(m_l["error"], url, err)
57
            log.error(m_l["agent"], headers)
58
            return {rslt: False, cntnt: str(err), msg: 5152}
59
    else:
60
        log.debug(m_l["path_check"], url)
61
        try:
62
            log.debug(m_l["get_site"])
63
            site_data = BeautifulSoup(open(url), parser)
64
        except (FileNotFoundError, UnicodeDecodeError) as err:
65
            log.error(m_l["error"], *(url, err))
66
            return {rslt: False, cntnt: str(err), msg: 5152}
67
68
    log.info(m_l["success"])
69
    return {rslt: True, cntnt: site_data, msg: None}
70