Completed
Push — master ( 72b331...d4b7d2 )
by Steffen
02:14
created

kuon.selenium_helper.SeleniumHelper.get()   A

Complexity

Conditions 5

Size

Total Lines 28
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 18
dl 0
loc 28
rs 9.0333
c 0
b 0
f 0
cc 5
nop 4
1
#!/usr/bin/python
2
# -*- coding: utf-8 -*-
3
4
import logging
5
import os
6
import sys
7
import time
8
from urllib.parse import urlencode, urlparse, parse_qsl, urlunparse
9
10
from selenium import webdriver
11
from selenium.webdriver import DesiredCapabilities
12
from selenium.webdriver.chrome.options import Options
13
14
CHROMEDRIVER_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'bin\\ChromeHeadless',
15
                                                 sys.platform, 'chromedriver'))
16
CHROME_OPTIONS = Options()
17
18
19
class SeleniumHelper(webdriver.Chrome):
20
    """Headless Chrome implementation with selenium"""
21
22
    def __init__(self, log_level=logging.ERROR, *args, **kwargs):
23
        CHROME_OPTIONS.add_argument("--headless")
24
        super().__init__(executable_path=CHROMEDRIVER_PATH, chrome_options=CHROME_OPTIONS, *args, **kwargs)
25
        logging.basicConfig(level=log_level,
26
                            format='[%(asctime)s.%(msecs)03d %(levelname)s %(name)s] %(message)s',
27
                            datefmt="%H:%M:%S")
28
        self.logger = logging.getLogger("selenium_logger")
29
30
    def get(self, url, params=None, headers=None):
31
        """Rebuild similar behaviour to requests.get function
32
33
        :param url:
34
        :param params:
35
        :param headers:
36
        :return:
37
        """
38
        if headers:
39
            desired_capabilities = DesiredCapabilities.CHROME.copy()
40
            for key in headers:
41
                desired_capabilities['chrome.page.customHeaders.{0:s}'.format(key.lower())] = headers[key]
42
            super().__init__(executable_path=CHROMEDRIVER_PATH, chrome_options=CHROME_OPTIONS,
43
                             desired_capabilities=desired_capabilities)
44
        if params:
45
            url_parts = list(urlparse(url))
46
            query = dict(parse_qsl(url_parts[4]))
47
            query.update(params)
48
49
            url_parts[4] = urlencode(query)
50
            url = urlunparse(url_parts)
51
52
        super().get(url)
53
        while 'Your browser will redirect to your requested content shortly.' in self.page_source:
54
            self.logger.debug("sleeping to pass cloudflare")
55
            time.sleep(1)
56
57
        return self
58