nilsabegg /
sesame
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace Sesame\Crawler; |
||||
| 4 | |||||
| 5 | use Sesame\Model\Article; |
||||
| 6 | use \Symfony\Component\DomCrawler\Crawler as DomCrawler; |
||||
| 7 | |||||
| 8 | /** |
||||
| 9 | * Class ArticleCrawler |
||||
| 10 | * |
||||
| 11 | * @package Sesame\Crawler |
||||
| 12 | */ |
||||
| 13 | class ArticleCrawler extends Crawler |
||||
| 14 | { |
||||
| 15 | |||||
| 16 | /** |
||||
| 17 | * ArticleCrawler constructor. |
||||
| 18 | */ |
||||
| 19 | public function __construct() |
||||
| 20 | { |
||||
| 21 | parent::__construct(); |
||||
| 22 | } |
||||
| 23 | |||||
| 24 | /** |
||||
| 25 | * crawlArticle |
||||
| 26 | * |
||||
| 27 | * @param string $responseBody |
||||
| 28 | * @param bool $crawlVariations |
||||
| 29 | * @return Article |
||||
| 30 | */ |
||||
| 31 | public function crawlArticle(string $responseBody, bool $crawlVariations): Article |
||||
|
0 ignored issues
–
show
|
|||||
| 32 | { |
||||
| 33 | $articleCrawler = new DomCrawler($responseBody); |
||||
| 34 | $article = new Article(); |
||||
| 35 | |||||
| 36 | // extract name of the article |
||||
| 37 | $nameHtml = $articleCrawler->filter('h1.product-name'); |
||||
| 38 | $article->setName($nameHtml->text()); |
||||
| 39 | |||||
| 40 | // extract ammount of total orders |
||||
| 41 | $article->setOrders($this->extractOrders($articleCrawler)); |
||||
| 42 | |||||
| 43 | // extract amount of articles in stock |
||||
| 44 | $article->setStock($this->extractStock($articleCrawler)); |
||||
| 45 | |||||
| 46 | // extract rating information |
||||
| 47 | $article->setRating($this->extractRating($articleCrawler)); |
||||
|
0 ignored issues
–
show
$this->extractRating($articleCrawler) of type double is incompatible with the type integer expected by parameter $rating of Sesame\Model\Article::setRating().
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 48 | $article->setRatingAmount($this->extractRatingAmount($articleCrawler)); |
||||
| 49 | |||||
| 50 | // extract seller information |
||||
| 51 | $article->setSeller($this->extractSeller($articleCrawler)); |
||||
| 52 | $article->setSellerLocation($this->extractSellerLocation($articleCrawler)); |
||||
| 53 | $article->setSellerSince($this->extractSellerSince($articleCrawler)); |
||||
| 54 | //$article->setSellerFeedback($this->extractSellerFeedback($articleCrawler)); |
||||
| 55 | |||||
| 56 | // extract product properties |
||||
| 57 | $article->setProperties($this->extractProperties($articleCrawler)); |
||||
| 58 | |||||
| 59 | // extract description |
||||
| 60 | $article->setDescription($this->extractDescription($articleCrawler)); |
||||
| 61 | |||||
| 62 | // extract price information |
||||
| 63 | $article->setPrice($this->extractPrice($articleCrawler)); |
||||
| 64 | $article->setPriceDiscount($this->extractPriceDiscount($articleCrawler)); |
||||
| 65 | $article->setPriceCurrency($this->extractPriceCurrency($articleCrawler)); |
||||
| 66 | |||||
| 67 | return $article; |
||||
| 68 | } |
||||
| 69 | |||||
| 70 | /** |
||||
| 71 | * @param DomCrawler $articleCrawler |
||||
| 72 | * @return string |
||||
| 73 | */ |
||||
| 74 | protected function extractDescription(DomCrawler $articleCrawler): string |
||||
| 75 | { |
||||
| 76 | $descriptionHtml = $articleCrawler->filter('#j-product-description'); |
||||
| 77 | $descriptionString = $descriptionHtml->text(); |
||||
| 78 | |||||
| 79 | return $descriptionString; |
||||
| 80 | } |
||||
| 81 | |||||
| 82 | /** |
||||
| 83 | * @param DomCrawler $articleCrawler |
||||
| 84 | * @return int |
||||
| 85 | */ |
||||
| 86 | protected function extractOrders(DomCrawler $articleCrawler): int |
||||
| 87 | { |
||||
| 88 | $ordersHtml = $articleCrawler->filter('#j-order-num'); |
||||
| 89 | $ordersString = $ordersHtml->text(); |
||||
| 90 | $ordersStringParts = explode(' ', $ordersString); |
||||
| 91 | |||||
| 92 | return (int) $ordersStringParts[0]; |
||||
| 93 | } |
||||
| 94 | |||||
| 95 | /** |
||||
| 96 | * @param DomCrawler $articleCrawler |
||||
| 97 | * @return float |
||||
| 98 | */ |
||||
| 99 | protected function extractPrice(DomCrawler $articleCrawler): float |
||||
| 100 | { |
||||
| 101 | $priceHtml = $articleCrawler->filter('#j-sku-price'); |
||||
| 102 | $priceString = $priceHtml->text(); |
||||
| 103 | |||||
| 104 | return (float) $priceString; |
||||
| 105 | } |
||||
| 106 | |||||
| 107 | /** |
||||
| 108 | * @param DomCrawler $articleCrawler |
||||
| 109 | * @return string |
||||
| 110 | */ |
||||
| 111 | protected function extractPriceCurrency(DomCrawler $articleCrawler): string |
||||
| 112 | { |
||||
| 113 | $priceCurrencyHtml = $articleCrawler->filter('.p-price-content .p-symbol'); |
||||
| 114 | $priceCurrencyString = $priceCurrencyHtml->attr('content'); |
||||
| 115 | |||||
| 116 | return $priceCurrencyString; |
||||
|
0 ignored issues
–
show
|
|||||
| 117 | } |
||||
| 118 | |||||
| 119 | /** |
||||
| 120 | * @param DomCrawler $articleCrawler |
||||
| 121 | * @return float |
||||
| 122 | */ |
||||
| 123 | protected function extractPriceDiscount(DomCrawler $articleCrawler): float |
||||
| 124 | { |
||||
| 125 | $priceDiscountHtml = $articleCrawler->filter('#j-sku-discount-price'); |
||||
| 126 | $priceDiscountString = $priceDiscountHtml->text(); |
||||
| 127 | |||||
| 128 | return (float) $priceDiscountString; |
||||
| 129 | } |
||||
| 130 | |||||
| 131 | /** |
||||
| 132 | * @param DomCrawler $articleCrawler |
||||
| 133 | * @return array |
||||
| 134 | */ |
||||
| 135 | protected function extractProperties(DomCrawler $articleCrawler): array |
||||
| 136 | { |
||||
| 137 | $properties = array(); |
||||
| 138 | $propertyElements = $articleCrawler->filter('.product-property-list .property-item'); |
||||
| 139 | foreach ($propertyElements as $propertyElement) { |
||||
| 140 | $propertyCrawler = new DomCrawler($propertyElement); |
||||
| 141 | $propertyTitle = substr($propertyCrawler->filter('.propery-title')->text(), 0, -1); |
||||
| 142 | $propertyValue = $propertyCrawler->filter('.propery-des')->text(); |
||||
| 143 | if (array_key_exists($propertyTitle, $properties)) { |
||||
| 144 | $properties[$propertyTitle] .= '-' . $propertyValue; |
||||
| 145 | } else { |
||||
| 146 | $properties[$propertyTitle] = $propertyValue; |
||||
| 147 | } |
||||
| 148 | } |
||||
| 149 | |||||
| 150 | return $properties; |
||||
| 151 | } |
||||
| 152 | |||||
| 153 | /** |
||||
| 154 | * @param DomCrawler $articleCrawler |
||||
| 155 | * @return float |
||||
| 156 | */ |
||||
| 157 | protected function extractRating(DomCrawler $articleCrawler): float |
||||
| 158 | { |
||||
| 159 | $ratingHtml = $articleCrawler->filter('#j-customer-reviews-trigger .percent-num'); |
||||
| 160 | $ratingString = $ratingHtml->text(); |
||||
| 161 | $ratingStringParts = explode(' ', $ratingString); |
||||
| 162 | |||||
| 163 | return (float) $ratingStringParts[0]; |
||||
| 164 | } |
||||
| 165 | |||||
| 166 | /** |
||||
| 167 | * @param DomCrawler $articleCrawler |
||||
| 168 | * @return int |
||||
| 169 | */ |
||||
| 170 | protected function extractRatingAmount(DomCrawler $articleCrawler): int |
||||
| 171 | { |
||||
| 172 | $ratingAmountHtml = $articleCrawler->filter('#j-customer-reviews-trigger .rantings-num'); |
||||
| 173 | $ratingAmountString = $ratingAmountHtml->text(); |
||||
| 174 | $ratingAmountStringParts = explode(' ', $ratingAmountString); |
||||
| 175 | |||||
| 176 | return (int) str_replace('(', '', $ratingAmountStringParts[0]); |
||||
| 177 | } |
||||
| 178 | |||||
| 179 | /** |
||||
| 180 | * @param DomCrawler $articleCrawler |
||||
| 181 | * @return string |
||||
| 182 | */ |
||||
| 183 | protected function extractSeller(DomCrawler $articleCrawler): string |
||||
| 184 | { |
||||
| 185 | $sellerHtml = $articleCrawler->filter('#j-store-info-wrap .store-lnk'); |
||||
| 186 | $sellerString = $sellerHtml->text(); |
||||
| 187 | |||||
| 188 | return $sellerString; |
||||
| 189 | } |
||||
| 190 | |||||
| 191 | /** |
||||
| 192 | * @param DomCrawler $articleCrawler |
||||
| 193 | * @return float |
||||
| 194 | */ |
||||
| 195 | protected function extractSellerFeedback(DomCrawler $articleCrawler): float |
||||
| 196 | { |
||||
| 197 | $sellerFeedbackHtml = $articleCrawler->filter('.seller-score-feedback span a'); |
||||
| 198 | $sellerFeedbackString = str_replace('%', '', $sellerFeedbackHtml->text()); |
||||
| 199 | |||||
| 200 | return (float) $sellerFeedbackString; |
||||
| 201 | } |
||||
| 202 | |||||
| 203 | /** |
||||
| 204 | * @param DomCrawler $articleCrawler |
||||
| 205 | * @return string |
||||
| 206 | */ |
||||
| 207 | protected function extractSellerLocation(DomCrawler $articleCrawler): string |
||||
| 208 | { |
||||
| 209 | $sellerLocationHtml = $articleCrawler->filter('#j-store-info-wrap .store-address'); |
||||
| 210 | $sellerLocationString = trim($sellerLocationHtml->text()); |
||||
| 211 | |||||
| 212 | return $sellerLocationString; |
||||
| 213 | } |
||||
| 214 | |||||
| 215 | /** |
||||
| 216 | * @param DomCrawler $articleCrawler |
||||
| 217 | * @return int |
||||
| 218 | */ |
||||
| 219 | protected function extractSellerSince(DomCrawler $articleCrawler): int |
||||
| 220 | { |
||||
| 221 | $sellerSinceHtml = $articleCrawler->filter('.store-open-time span'); |
||||
| 222 | $sellerSinceString = $sellerSinceHtml->text(); |
||||
| 223 | $sellerSinceStringParts = explode(' ', $sellerSinceString); |
||||
| 224 | |||||
| 225 | return (int) $sellerSinceStringParts[0]; |
||||
| 226 | } |
||||
| 227 | |||||
| 228 | /** |
||||
| 229 | * @param DomCrawler $articleCrawler |
||||
| 230 | * @return int |
||||
| 231 | */ |
||||
| 232 | protected function extractStock(DomCrawler $articleCrawler): int |
||||
| 233 | { |
||||
| 234 | $stockAmountHtml = $articleCrawler->filter('#j-sell-stock-num'); |
||||
| 235 | $stockAmountString = $stockAmountHtml->text(); |
||||
| 236 | $stockAmountStringParts = explode(' ', $stockAmountString); |
||||
| 237 | |||||
| 238 | return (int) $stockAmountStringParts[0]; |
||||
| 239 | } |
||||
| 240 | } |
||||
| 241 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.