|
1
|
|
|
from pysmartprice.abstract import ( |
|
2
|
|
|
BaseParser, |
|
3
|
|
|
ParserMixin |
|
4
|
|
|
) |
|
5
|
|
|
from pysmartprice.helpers import scrape |
|
6
|
|
|
from bs4 import BeautifulSoup |
|
7
|
|
|
from pysmartprice.results import SmartPriceSeller |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
class PriceListParser(BaseParser, ParserMixin): |
|
11
|
|
|
@property |
|
12
|
|
|
def get_paged_url(self): |
|
13
|
|
|
i = self.url.find(self.mapper) |
|
14
|
|
|
paged_url = '{}pages/{}'.format(self.url[:i], self.url[i:]) |
|
15
|
|
|
return paged_url |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
class SearchParser(BaseParser, ParserMixin): |
|
19
|
|
|
@property |
|
20
|
|
|
def get_paged_url(self): |
|
21
|
|
|
return self.url |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
|
|
# SCRAPE SELLERS |
|
25
|
|
|
class SellerParser(object): |
|
26
|
|
|
def __init__(self, url, *args, **kwargs): |
|
27
|
|
|
self.url = url |
|
28
|
|
|
self.response = scrape(self.url, **kwargs) |
|
29
|
|
|
self.soup = BeautifulSoup(self.response, 'lxml') |
|
30
|
|
|
self.result = [ |
|
31
|
|
|
SmartPriceSeller(self.get_product_attrs(item)) |
|
32
|
|
|
for item in self.products_html |
|
33
|
|
|
] |
|
34
|
|
|
|
|
35
|
|
|
def get_product_attrs(self, item): |
|
36
|
|
|
return dict( |
|
37
|
|
|
logo=item.find('img', {'class': 'prc-tbl__str-logo'}).get('src'), |
|
38
|
|
|
rating=item.find( |
|
39
|
|
|
'span', attrs={'class': 'rtng-bdg rtng-bdg--dark-grn'}).text, |
|
40
|
|
|
price=item.find( |
|
41
|
|
|
'span', attrs={'class': 'prc-tbl__cost-val'}).text, |
|
42
|
|
|
name=item.find('div', attrs={'class': 'prc-tbl__slr-name'}).text |
|
43
|
|
|
) |
|
44
|
|
|
|
|
45
|
|
|
@property |
|
46
|
|
|
def products_html(self): |
|
47
|
|
|
html = self.soup.findAll( |
|
48
|
|
|
'div', attrs={'class': 'prc-tbl-row__inr clearfix'}) |
|
49
|
|
|
return html |
|
50
|
|
|
|