1 | <?php |
||||
2 | |||||
3 | namespace Sesame\Crawler; |
||||
4 | |||||
5 | use Sesame\Model\Article; |
||||
6 | use \Symfony\Component\DomCrawler\Crawler as DomCrawler; |
||||
7 | |||||
8 | /** |
||||
9 | * Class ArticleCrawler |
||||
10 | * |
||||
11 | * @package Sesame\Crawler |
||||
12 | */ |
||||
13 | class ArticleCrawler extends Crawler |
||||
14 | { |
||||
15 | |||||
16 | /** |
||||
17 | * ArticleCrawler constructor. |
||||
18 | */ |
||||
19 | public function __construct() |
||||
20 | { |
||||
21 | parent::__construct(); |
||||
22 | } |
||||
23 | |||||
24 | /** |
||||
25 | * crawlArticle |
||||
26 | * |
||||
27 | * @param string $responseBody |
||||
28 | * @param bool $crawlVariations |
||||
29 | * @return Article |
||||
30 | */ |
||||
31 | public function crawlArticle(string $responseBody, bool $crawlVariations): Article |
||||
0 ignored issues
–
show
|
|||||
32 | { |
||||
33 | $articleCrawler = new DomCrawler($responseBody); |
||||
34 | $article = new Article(); |
||||
35 | |||||
36 | // extract name of the article |
||||
37 | $nameHtml = $articleCrawler->filter('h1.product-name'); |
||||
38 | $article->setName($nameHtml->text()); |
||||
39 | |||||
40 | // extract ammount of total orders |
||||
41 | $article->setOrders($this->extractOrders($articleCrawler)); |
||||
42 | |||||
43 | // extract amount of articles in stock |
||||
44 | $article->setStock($this->extractStock($articleCrawler)); |
||||
45 | |||||
46 | // extract rating information |
||||
47 | $article->setRating($this->extractRating($articleCrawler)); |
||||
0 ignored issues
–
show
$this->extractRating($articleCrawler) of type double is incompatible with the type integer expected by parameter $rating of Sesame\Model\Article::setRating() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
48 | $article->setRatingAmount($this->extractRatingAmount($articleCrawler)); |
||||
49 | |||||
50 | // extract seller information |
||||
51 | $article->setSeller($this->extractSeller($articleCrawler)); |
||||
52 | $article->setSellerLocation($this->extractSellerLocation($articleCrawler)); |
||||
53 | $article->setSellerSince($this->extractSellerSince($articleCrawler)); |
||||
54 | //$article->setSellerFeedback($this->extractSellerFeedback($articleCrawler)); |
||||
55 | |||||
56 | // extract product properties |
||||
57 | $article->setProperties($this->extractProperties($articleCrawler)); |
||||
58 | |||||
59 | // extract description |
||||
60 | $article->setDescription($this->extractDescription($articleCrawler)); |
||||
61 | |||||
62 | // extract price information |
||||
63 | $article->setPrice($this->extractPrice($articleCrawler)); |
||||
64 | $article->setPriceDiscount($this->extractPriceDiscount($articleCrawler)); |
||||
65 | $article->setPriceCurrency($this->extractPriceCurrency($articleCrawler)); |
||||
66 | |||||
67 | return $article; |
||||
68 | } |
||||
69 | |||||
70 | /** |
||||
71 | * @param DomCrawler $articleCrawler |
||||
72 | * @return string |
||||
73 | */ |
||||
74 | protected function extractDescription(DomCrawler $articleCrawler): string |
||||
75 | { |
||||
76 | $descriptionHtml = $articleCrawler->filter('#j-product-description'); |
||||
77 | $descriptionString = $descriptionHtml->text(); |
||||
78 | |||||
79 | return $descriptionString; |
||||
80 | } |
||||
81 | |||||
82 | /** |
||||
83 | * @param DomCrawler $articleCrawler |
||||
84 | * @return int |
||||
85 | */ |
||||
86 | protected function extractOrders(DomCrawler $articleCrawler): int |
||||
87 | { |
||||
88 | $ordersHtml = $articleCrawler->filter('#j-order-num'); |
||||
89 | $ordersString = $ordersHtml->text(); |
||||
90 | $ordersStringParts = explode(' ', $ordersString); |
||||
91 | |||||
92 | return (int) $ordersStringParts[0]; |
||||
93 | } |
||||
94 | |||||
95 | /** |
||||
96 | * @param DomCrawler $articleCrawler |
||||
97 | * @return float |
||||
98 | */ |
||||
99 | protected function extractPrice(DomCrawler $articleCrawler): float |
||||
100 | { |
||||
101 | $priceHtml = $articleCrawler->filter('#j-sku-price'); |
||||
102 | $priceString = $priceHtml->text(); |
||||
103 | |||||
104 | return (float) $priceString; |
||||
105 | } |
||||
106 | |||||
107 | /** |
||||
108 | * @param DomCrawler $articleCrawler |
||||
109 | * @return string |
||||
110 | */ |
||||
111 | protected function extractPriceCurrency(DomCrawler $articleCrawler): string |
||||
112 | { |
||||
113 | $priceCurrencyHtml = $articleCrawler->filter('.p-price-content .p-symbol'); |
||||
114 | $priceCurrencyString = $priceCurrencyHtml->attr('content'); |
||||
115 | |||||
116 | return $priceCurrencyString; |
||||
0 ignored issues
–
show
|
|||||
117 | } |
||||
118 | |||||
119 | /** |
||||
120 | * @param DomCrawler $articleCrawler |
||||
121 | * @return float |
||||
122 | */ |
||||
123 | protected function extractPriceDiscount(DomCrawler $articleCrawler): float |
||||
124 | { |
||||
125 | $priceDiscountHtml = $articleCrawler->filter('#j-sku-discount-price'); |
||||
126 | $priceDiscountString = $priceDiscountHtml->text(); |
||||
127 | |||||
128 | return (float) $priceDiscountString; |
||||
129 | } |
||||
130 | |||||
131 | /** |
||||
132 | * @param DomCrawler $articleCrawler |
||||
133 | * @return array |
||||
134 | */ |
||||
135 | protected function extractProperties(DomCrawler $articleCrawler): array |
||||
136 | { |
||||
137 | $properties = array(); |
||||
138 | $propertyElements = $articleCrawler->filter('.product-property-list .property-item'); |
||||
139 | foreach ($propertyElements as $propertyElement) { |
||||
140 | $propertyCrawler = new DomCrawler($propertyElement); |
||||
141 | $propertyTitle = substr($propertyCrawler->filter('.propery-title')->text(), 0, -1); |
||||
142 | $propertyValue = $propertyCrawler->filter('.propery-des')->text(); |
||||
143 | if (array_key_exists($propertyTitle, $properties)) { |
||||
144 | $properties[$propertyTitle] .= '-' . $propertyValue; |
||||
145 | } else { |
||||
146 | $properties[$propertyTitle] = $propertyValue; |
||||
147 | } |
||||
148 | } |
||||
149 | |||||
150 | return $properties; |
||||
151 | } |
||||
152 | |||||
153 | /** |
||||
154 | * @param DomCrawler $articleCrawler |
||||
155 | * @return float |
||||
156 | */ |
||||
157 | protected function extractRating(DomCrawler $articleCrawler): float |
||||
158 | { |
||||
159 | $ratingHtml = $articleCrawler->filter('#j-customer-reviews-trigger .percent-num'); |
||||
160 | $ratingString = $ratingHtml->text(); |
||||
161 | $ratingStringParts = explode(' ', $ratingString); |
||||
162 | |||||
163 | return (float) $ratingStringParts[0]; |
||||
164 | } |
||||
165 | |||||
166 | /** |
||||
167 | * @param DomCrawler $articleCrawler |
||||
168 | * @return int |
||||
169 | */ |
||||
170 | protected function extractRatingAmount(DomCrawler $articleCrawler): int |
||||
171 | { |
||||
172 | $ratingAmountHtml = $articleCrawler->filter('#j-customer-reviews-trigger .rantings-num'); |
||||
173 | $ratingAmountString = $ratingAmountHtml->text(); |
||||
174 | $ratingAmountStringParts = explode(' ', $ratingAmountString); |
||||
175 | |||||
176 | return (int) str_replace('(', '', $ratingAmountStringParts[0]); |
||||
177 | } |
||||
178 | |||||
179 | /** |
||||
180 | * @param DomCrawler $articleCrawler |
||||
181 | * @return string |
||||
182 | */ |
||||
183 | protected function extractSeller(DomCrawler $articleCrawler): string |
||||
184 | { |
||||
185 | $sellerHtml = $articleCrawler->filter('#j-store-info-wrap .store-lnk'); |
||||
186 | $sellerString = $sellerHtml->text(); |
||||
187 | |||||
188 | return $sellerString; |
||||
189 | } |
||||
190 | |||||
191 | /** |
||||
192 | * @param DomCrawler $articleCrawler |
||||
193 | * @return float |
||||
194 | */ |
||||
195 | protected function extractSellerFeedback(DomCrawler $articleCrawler): float |
||||
196 | { |
||||
197 | $sellerFeedbackHtml = $articleCrawler->filter('.seller-score-feedback span a'); |
||||
198 | $sellerFeedbackString = str_replace('%', '', $sellerFeedbackHtml->text()); |
||||
199 | |||||
200 | return (float) $sellerFeedbackString; |
||||
201 | } |
||||
202 | |||||
203 | /** |
||||
204 | * @param DomCrawler $articleCrawler |
||||
205 | * @return string |
||||
206 | */ |
||||
207 | protected function extractSellerLocation(DomCrawler $articleCrawler): string |
||||
208 | { |
||||
209 | $sellerLocationHtml = $articleCrawler->filter('#j-store-info-wrap .store-address'); |
||||
210 | $sellerLocationString = trim($sellerLocationHtml->text()); |
||||
211 | |||||
212 | return $sellerLocationString; |
||||
213 | } |
||||
214 | |||||
215 | /** |
||||
216 | * @param DomCrawler $articleCrawler |
||||
217 | * @return int |
||||
218 | */ |
||||
219 | protected function extractSellerSince(DomCrawler $articleCrawler): int |
||||
220 | { |
||||
221 | $sellerSinceHtml = $articleCrawler->filter('.store-open-time span'); |
||||
222 | $sellerSinceString = $sellerSinceHtml->text(); |
||||
223 | $sellerSinceStringParts = explode(' ', $sellerSinceString); |
||||
224 | |||||
225 | return (int) $sellerSinceStringParts[0]; |
||||
226 | } |
||||
227 | |||||
228 | /** |
||||
229 | * @param DomCrawler $articleCrawler |
||||
230 | * @return int |
||||
231 | */ |
||||
232 | protected function extractStock(DomCrawler $articleCrawler): int |
||||
233 | { |
||||
234 | $stockAmountHtml = $articleCrawler->filter('#j-sell-stock-num'); |
||||
235 | $stockAmountString = $stockAmountHtml->text(); |
||||
236 | $stockAmountStringParts = explode(' ', $stockAmountString); |
||||
237 | |||||
238 | return (int) $stockAmountStringParts[0]; |
||||
239 | } |
||||
240 | } |
||||
241 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.