1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
from dataclasses import dataclass |
3
|
|
|
from typing import Sequence |
4
|
|
|
|
5
|
|
|
from pocketutils.core.query_utils import QueryExecutor |
|
|
|
|
6
|
|
|
|
7
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
8
|
|
|
|
9
|
|
|
from mandos import logger |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
try: |
13
|
|
|
import selenium |
|
|
|
|
14
|
|
|
except ImportError: |
15
|
|
|
selenium = None |
16
|
|
|
logger.info("Selenium not installed") |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
# noinspection PyBroadException |
20
|
|
|
try: |
21
|
|
|
from selenium import webdriver |
|
|
|
|
22
|
|
|
from selenium.webdriver.common.by import By |
|
|
|
|
23
|
|
|
from selenium.webdriver.remote.webdriver import WebDriver |
|
|
|
|
24
|
|
|
from selenium.webdriver.remote.webelement import WebElement |
|
|
|
|
25
|
|
|
except Exception: |
|
|
|
|
26
|
|
|
webdriver = None |
|
|
|
|
27
|
|
|
WebDriver = None |
|
|
|
|
28
|
|
|
By = None |
|
|
|
|
29
|
|
|
|
30
|
|
|
if webdriver is not None: |
31
|
|
|
# noinspection PyBroadException |
32
|
|
|
try: |
33
|
|
|
driver_fn = getattr(webdriver, MANDOS_SETTINGS.selenium_driver) |
34
|
|
|
logger.notice(f"Loaded Selenium driver {MANDOS_SETTINGS.selenium_driver}") |
35
|
|
|
except AttributeError: |
36
|
|
|
driver_fn = None |
|
|
|
|
37
|
|
|
logger.warning(f"Selenium driver {MANDOS_SETTINGS.selenium_driver} not found") |
38
|
|
|
|
39
|
|
|
|
40
|
|
|
@dataclass(frozen=True) |
|
|
|
|
41
|
|
|
class Scraper: |
42
|
|
|
driver: WebDriver |
43
|
|
|
executor: QueryExecutor |
44
|
|
|
|
45
|
|
|
@classmethod |
46
|
|
|
def create(cls, executor: QueryExecutor) -> Scraper: |
|
|
|
|
47
|
|
|
if driver_fn is None: |
|
|
|
|
48
|
|
|
raise ValueError(f"Selenium driver {MANDOS_SETTINGS.selenium_driver} not found") |
49
|
|
|
return Scraper(driver_fn(), executor) |
50
|
|
|
|
51
|
|
|
def go(self, url: str) -> Scraper: |
|
|
|
|
52
|
|
|
self.driver.get(url) |
53
|
|
|
# self.driver.find_elements_by_link_text("1") |
54
|
|
|
return self |
55
|
|
|
|
56
|
|
|
def find_element(self, thing: str, by: str) -> WebElement: |
|
|
|
|
57
|
|
|
by = by.upper() |
58
|
|
|
return self.driver.find_element(thing, by) |
59
|
|
|
|
60
|
|
|
def find_elements(self, thing: str, by: str) -> Sequence[WebElement]: |
|
|
|
|
61
|
|
|
by = by.upper() |
62
|
|
|
return self.driver.find_elements(thing, by) |
63
|
|
|
|
64
|
|
|
def click_element(self, thing: str, by: str) -> None: |
|
|
|
|
65
|
|
|
by = by.upper() |
66
|
|
|
element = self.driver.find_element(thing, by) |
67
|
|
|
element.click() |
68
|
|
|
|
69
|
|
|
|
70
|
|
|
__all__ = ["Scraper", "By"] |
71
|
|
|
|