1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Lbc\Crawler; |
4
|
|
|
|
5
|
|
|
use Lbc\Parser\SearchResultUrlParser; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Class SearchResultCrawler |
9
|
|
|
* @package Lbc\node |
10
|
|
|
*/ |
11
|
|
|
class SearchResultCrawler extends CrawlerAbstract |
12
|
|
|
{ |
13
|
|
|
/** |
14
|
|
|
* @var SearchResultUrlParser |
15
|
|
|
*/ |
16
|
|
|
protected $url; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @param $url |
20
|
|
|
* @return SearchResultUrlParser |
21
|
|
|
*/ |
22
|
14 |
|
protected function setUrlParser($url) |
23
|
|
|
{ |
24
|
14 |
|
$this->url = new SearchResultUrlParser($url); |
25
|
14 |
|
} |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Return the total number of ads of the search |
29
|
|
|
* |
30
|
|
|
* @return int |
31
|
|
|
*/ |
32
|
8 |
|
public function getNbAds() |
33
|
|
|
{ |
34
|
8 |
|
$nbAds = $this->node |
35
|
8 |
|
->filter('a.tabsSwitch span.tabsSwitchNumbers') |
36
|
8 |
|
->first(); |
37
|
|
|
|
38
|
8 |
|
if ($nbAds->count()) { |
39
|
6 |
|
$nbAds = preg_replace('/\s+/', '', $nbAds->text()); |
40
|
6 |
|
return (int) $nbAds; |
41
|
|
|
} |
42
|
|
|
|
43
|
2 |
|
return 0; |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* Return the number of ads per page. |
48
|
|
|
* |
49
|
|
|
* Could be dynamically guessed in future, if Leboncoin change it frequently |
50
|
|
|
* Or if they add the ability for user to change it on result pages. |
51
|
|
|
* |
52
|
|
|
* |
53
|
|
|
* @return int |
54
|
|
|
*/ |
55
|
8 |
|
public function getNbAdsPerPage() |
56
|
|
|
{ |
57
|
8 |
|
return 35; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Return the number of page |
62
|
|
|
* |
63
|
|
|
* @return int |
64
|
|
|
*/ |
65
|
8 |
|
public function getNbPages() |
66
|
|
|
{ |
67
|
8 |
|
return (int) ceil($this->getNbAds() / $this->getNbAdsPerPage()); |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Get an array containing the ads of the current result page |
72
|
|
|
* |
73
|
|
|
* @return array |
74
|
|
|
*/ |
75
|
10 |
|
public function getAds() |
76
|
|
|
{ |
77
|
10 |
|
$ads = array(); |
78
|
|
|
|
79
|
10 |
|
$this->node->filter('[itemtype="http://schema.org/Offer"]') |
80
|
10 |
|
->each(function ($node) use (&$ads) { |
81
|
8 |
|
$ad = (new SearchResultAdCrawler( |
82
|
|
|
$node, |
83
|
8 |
|
$node->filter('a')->attr('href') |
84
|
8 |
|
))->getAll(); |
85
|
|
|
|
86
|
8 |
|
$ads [$ad['id']] = $ad; |
87
|
10 |
|
}); |
88
|
|
|
|
89
|
10 |
|
return $ads; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Return the Ads's ID only |
94
|
|
|
* |
95
|
|
|
* @return array |
96
|
|
|
*/ |
97
|
6 |
|
public function getAdsId() |
98
|
|
|
{ |
99
|
6 |
|
return array_keys($this->getAds()); |
100
|
|
|
} |
101
|
|
|
} |
102
|
|
|
|