1
|
|
|
from pysmartprice.abstract import( |
2
|
|
|
BaseParser, |
3
|
|
|
ParserMixin |
4
|
|
|
) |
5
|
|
|
from pysmartprice.helpers import scrape |
6
|
|
|
from bs4 import BeautifulSoup |
7
|
|
|
from pysmartprice.results import SmartPriceSeller |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
class PriceListParser(BaseParser, ParserMixin): |
11
|
|
|
@property |
12
|
|
|
def get_paged_url(self): |
13
|
|
|
i = self.url.find(self.mapper) |
14
|
|
|
paged_url = '{}pages/{}'.format(self.url[:i], self.url[i:]) |
15
|
|
|
return paged_url |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
class SearchParser(BaseParser, ParserMixin): |
19
|
|
|
@property |
20
|
|
|
def get_paged_url(self): |
21
|
|
|
return self.url |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
# SCRAPE SELLERS |
25
|
|
|
class SellerParser(object): |
26
|
|
|
def __init__(self, url, *args, **kwargs): |
27
|
|
|
self.url = url |
28
|
|
|
self.response = scrape(self.url, **kwargs) |
29
|
|
|
self.soup = BeautifulSoup(self.response, 'lxml') |
30
|
|
|
self.result = [ |
31
|
|
|
SmartPriceSeller(self.get_product_attrs(item)) |
32
|
|
|
for item in self.products_html |
33
|
|
|
] |
34
|
|
|
|
35
|
|
|
def get_product_attrs(self, item): |
36
|
|
|
return dict( |
37
|
|
|
logo=item.find('img', {'class': 'prc-tbl__str-logo'}).get('src'), |
38
|
|
|
rating=item.find( |
39
|
|
|
'span', attrs={'class': 'rtng-bdg rtng-bdg--dark-grn'}).text, |
40
|
|
|
price=item.find( |
41
|
|
|
'span', attrs={'class': 'prc-tbl__cost-val'}).text, |
42
|
|
|
name=item.find('div', attrs={'class': 'prc-tbl__slr-name'}).text |
43
|
|
|
) |
44
|
|
|
|
45
|
|
|
@property |
46
|
|
|
def products_html(self): |
47
|
|
|
html = self.soup.findAll( |
48
|
|
|
'div', attrs={'class': 'prc-tbl-row__inr clearfix'}) |
49
|
|
|
return html |
50
|
|
|
|