Conditions | 8 |
Total Lines | 61 |
Code Lines | 44 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | import re |
||
9 | def get_data( |
||
10 | url: str, parser: str = "lxml", headers: dict = None |
||
11 | ) -> dict[str, bool | str | BeautifulSoup]: |
||
12 | """ |
||
13 | This function downloads and parses content of URL site |
||
14 | :url: address of needed site or directory |
||
15 | :return: dict with elements: |
||
16 | > :"result": *bool* with result of downloading process |
||
17 | > :"content": *BeautifulSoup* with elements if Result is True |
||
18 | OR |
||
19 | *str* with error message if Result is False |
||
20 | """ |
||
21 | cntnt, rslt, msg = "content", "result", "message" |
||
22 | pattern_http = "^http" |
||
23 | m_l = { |
||
24 | "start": "Начинаем загрузку данных с сайта", |
||
25 | "error": "Не удалось получить данные:\n\t>> Адрес:\t%s\n\t>> Ошибка:\t%s", |
||
26 | "get_site": "Пробуем скачать данные с ресурса", |
||
27 | "url_check": "Проверяем, являются ли введенные данные адресом веб-страницы", |
||
28 | "url_correct": "Введен корректный адрес веб-страницы:\t%s", |
||
29 | "path_check": "Проверяем, являются ли введенные данные адресом файла \n\t>> Адрес:\t%s", |
||
30 | "parse": "Пробуем обработать полученные данные", |
||
31 | "agent": "Содержимое строки headers:\n\t>>\t%s", |
||
32 | "success": "Данные с сайта успешно загружены", |
||
33 | } |
||
34 | |||
35 | log.info(m_l["start"]) |
||
36 | log.debug(m_l["url_check"]) |
||
37 | |||
38 | if re.match(pattern_http, url): |
||
39 | log.debug(m_l["url_correct"], url) |
||
40 | try: |
||
41 | log.debug(m_l["get_site"]) |
||
42 | if url.lower().startswith("http"): |
||
43 | request_to_site = urllib.request.Request( |
||
44 | url=url, headers=headers if headers else {} |
||
45 | ) |
||
46 | else: |
||
47 | raise ValueError from None |
||
48 | with urllib.request.urlopen(request_to_site) as response: |
||
49 | try: |
||
50 | log.debug(m_l["parse"]) |
||
51 | site_data = BeautifulSoup(response, parser) |
||
52 | except urllib.error.HTTPError as err: |
||
53 | log.error(m_l["error"], *(url, err)) |
||
54 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
55 | except urllib.error.URLError as err: |
||
56 | log.error(m_l["error"], url, err) |
||
57 | log.error(m_l["agent"], headers) |
||
58 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
59 | else: |
||
60 | log.debug(m_l["path_check"], url) |
||
61 | try: |
||
62 | log.debug(m_l["get_site"]) |
||
63 | site_data = BeautifulSoup(open(url), parser) |
||
64 | except (FileNotFoundError, UnicodeDecodeError) as err: |
||
65 | log.error(m_l["error"], *(url, err)) |
||
66 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
67 | |||
68 | log.info(m_l["success"]) |
||
69 | return {rslt: True, cntnt: site_data, msg: None} |
||
70 |