| Conditions | 8 |
| Total Lines | 61 |
| Code Lines | 44 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | import re |
||
| 9 | def get_data( |
||
| 10 | url: str, parser: str = "lxml", headers: dict = None |
||
| 11 | ) -> dict[str, bool | str | BeautifulSoup]: |
||
| 12 | """ |
||
| 13 | This function downloads and parses content of URL site |
||
| 14 | :url: address of needed site or directory |
||
| 15 | :return: dict with elements: |
||
| 16 | > :"result": *bool* with result of downloading process |
||
| 17 | > :"content": *BeautifulSoup* with elements if Result is True |
||
| 18 | OR |
||
| 19 | *str* with error message if Result is False |
||
| 20 | """ |
||
| 21 | cntnt, rslt, msg = "content", "result", "message" |
||
| 22 | pattern_http = "^http" |
||
| 23 | m_l = { |
||
| 24 | "start": "Начинаем загрузку данных с сайта", |
||
| 25 | "error": "Не удалось получить данные:\n\t>> Адрес:\t%s\n\t>> Ошибка:\t%s", |
||
| 26 | "get_site": "Пробуем скачать данные с ресурса", |
||
| 27 | "url_check": "Проверяем, являются ли введенные данные адресом веб-страницы", |
||
| 28 | "url_correct": "Введен корректный адрес веб-страницы:\t%s", |
||
| 29 | "path_check": "Проверяем, являются ли введенные данные адресом файла \n\t>> Адрес:\t%s", |
||
| 30 | "parse": "Пробуем обработать полученные данные", |
||
| 31 | "agent": "Содержимое строки headers:\n\t>>\t%s", |
||
| 32 | "success": "Данные с сайта успешно загружены", |
||
| 33 | } |
||
| 34 | |||
| 35 | log.info(m_l["start"]) |
||
| 36 | log.debug(m_l["url_check"]) |
||
| 37 | |||
| 38 | if re.match(pattern_http, url): |
||
| 39 | log.debug(m_l["url_correct"], url) |
||
| 40 | try: |
||
| 41 | log.debug(m_l["get_site"]) |
||
| 42 | if url.lower().startswith("http"): |
||
| 43 | request_to_site = urllib.request.Request( |
||
| 44 | url=url, headers=headers if headers else {} |
||
| 45 | ) |
||
| 46 | else: |
||
| 47 | raise ValueError from None |
||
| 48 | with urllib.request.urlopen(request_to_site) as response: |
||
| 49 | try: |
||
| 50 | log.debug(m_l["parse"]) |
||
| 51 | site_data = BeautifulSoup(response, parser) |
||
| 52 | except urllib.error.HTTPError as err: |
||
| 53 | log.error(m_l["error"], *(url, err)) |
||
| 54 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
| 55 | except urllib.error.URLError as err: |
||
| 56 | log.error(m_l["error"], url, err) |
||
| 57 | log.error(m_l["agent"], headers) |
||
| 58 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
| 59 | else: |
||
| 60 | log.debug(m_l["path_check"], url) |
||
| 61 | try: |
||
| 62 | log.debug(m_l["get_site"]) |
||
| 63 | site_data = BeautifulSoup(open(url), parser) |
||
| 64 | except (FileNotFoundError, UnicodeDecodeError) as err: |
||
| 65 | log.error(m_l["error"], *(url, err)) |
||
| 66 | return {rslt: False, cntnt: str(err), msg: 5152} |
||
| 67 | |||
| 68 | log.info(m_l["success"]) |
||
| 69 | return {rslt: True, cntnt: site_data, msg: None} |
||
| 70 |