Issues (6)

src/Sesame/Crawler/ArticleCrawler.php (3 issues)

1
<?php
2
3
namespace Sesame\Crawler;
4
5
use Sesame\Model\Article;
6
use \Symfony\Component\DomCrawler\Crawler as DomCrawler;
7
8
/**
9
 * Class ArticleCrawler
10
 *
11
 * @package Sesame\Crawler
12
 */
13
class ArticleCrawler extends Crawler
14
{
15
16
    /**
17
     * ArticleCrawler constructor.
18
     */
19
    public function __construct()
20
    {
21
        parent::__construct();
22
    }
23
24
    /**
25
     * crawlArticle
26
     *
27
     * @param string $responseBody
28
     * @param bool $crawlVariations
29
     * @return Article
30
     */
31
    public function crawlArticle(string $responseBody, bool $crawlVariations): Article
0 ignored issues
show
The parameter $crawlVariations is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

31
    public function crawlArticle(string $responseBody, /** @scrutinizer ignore-unused */ bool $crawlVariations): Article

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
32
    {
33
        $articleCrawler = new DomCrawler($responseBody);
34
        $article = new Article();
35
36
        // extract name of the article
37
        $nameHtml = $articleCrawler->filter('h1.product-name');
38
        $article->setName($nameHtml->text());
39
40
        // extract ammount of total orders
41
        $article->setOrders($this->extractOrders($articleCrawler));
42
43
        // extract amount of articles in stock
44
        $article->setStock($this->extractStock($articleCrawler));
45
46
        // extract rating information
47
        $article->setRating($this->extractRating($articleCrawler));
0 ignored issues
show
$this->extractRating($articleCrawler) of type double is incompatible with the type integer expected by parameter $rating of Sesame\Model\Article::setRating(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

47
        $article->setRating(/** @scrutinizer ignore-type */ $this->extractRating($articleCrawler));
Loading history...
48
        $article->setRatingAmount($this->extractRatingAmount($articleCrawler));
49
50
        // extract seller information
51
        $article->setSeller($this->extractSeller($articleCrawler));
52
        $article->setSellerLocation($this->extractSellerLocation($articleCrawler));
53
        $article->setSellerSince($this->extractSellerSince($articleCrawler));
54
        //$article->setSellerFeedback($this->extractSellerFeedback($articleCrawler));
55
56
        // extract product properties
57
        $article->setProperties($this->extractProperties($articleCrawler));
58
59
        // extract description
60
        $article->setDescription($this->extractDescription($articleCrawler));
61
62
        // extract price information
63
        $article->setPrice($this->extractPrice($articleCrawler));
64
        $article->setPriceDiscount($this->extractPriceDiscount($articleCrawler));
65
        $article->setPriceCurrency($this->extractPriceCurrency($articleCrawler));
66
67
        return $article;
68
    }
69
70
    /**
71
     * @param DomCrawler $articleCrawler
72
     * @return string
73
     */
74
    protected function extractDescription(DomCrawler $articleCrawler): string
75
    {
76
        $descriptionHtml = $articleCrawler->filter('#j-product-description');
77
        $descriptionString = $descriptionHtml->text();
78
79
        return $descriptionString;
80
    }
81
82
    /**
83
     * @param DomCrawler $articleCrawler
84
     * @return int
85
     */
86
    protected function extractOrders(DomCrawler $articleCrawler): int
87
    {
88
        $ordersHtml = $articleCrawler->filter('#j-order-num');
89
        $ordersString = $ordersHtml->text();
90
        $ordersStringParts = explode(' ', $ordersString);
91
92
        return (int) $ordersStringParts[0];
93
    }
94
95
    /**
96
     * @param DomCrawler $articleCrawler
97
     * @return float
98
     */
99
    protected function extractPrice(DomCrawler $articleCrawler): float
100
    {
101
        $priceHtml = $articleCrawler->filter('#j-sku-price');
102
        $priceString = $priceHtml->text();
103
104
        return (float) $priceString;
105
    }
106
107
    /**
108
     * @param DomCrawler $articleCrawler
109
     * @return string
110
     */
111
    protected function extractPriceCurrency(DomCrawler $articleCrawler): string
112
    {
113
        $priceCurrencyHtml = $articleCrawler->filter('.p-price-content .p-symbol');
114
        $priceCurrencyString = $priceCurrencyHtml->attr('content');
115
116
        return $priceCurrencyString;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $priceCurrencyString could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
117
    }
118
119
    /**
120
     * @param DomCrawler $articleCrawler
121
     * @return float
122
     */
123
    protected function extractPriceDiscount(DomCrawler $articleCrawler): float
124
    {
125
        $priceDiscountHtml = $articleCrawler->filter('#j-sku-discount-price');
126
        $priceDiscountString = $priceDiscountHtml->text();
127
128
        return (float) $priceDiscountString;
129
    }
130
131
    /**
132
     * @param DomCrawler $articleCrawler
133
     * @return array
134
     */
135
    protected function extractProperties(DomCrawler $articleCrawler): array
136
    {
137
        $properties = array();
138
        $propertyElements = $articleCrawler->filter('.product-property-list .property-item');
139
        foreach ($propertyElements as $propertyElement) {
140
            $propertyCrawler = new DomCrawler($propertyElement);
141
            $propertyTitle = substr($propertyCrawler->filter('.propery-title')->text(), 0, -1);
142
            $propertyValue = $propertyCrawler->filter('.propery-des')->text();
143
            if (array_key_exists($propertyTitle, $properties)) {
144
                $properties[$propertyTitle] .= '-' . $propertyValue;
145
            } else {
146
                $properties[$propertyTitle] = $propertyValue;
147
            }
148
        }
149
150
        return $properties;
151
    }
152
153
    /**
154
     * @param DomCrawler $articleCrawler
155
     * @return float
156
     */
157
    protected function extractRating(DomCrawler $articleCrawler): float
158
    {
159
        $ratingHtml = $articleCrawler->filter('#j-customer-reviews-trigger .percent-num');
160
        $ratingString = $ratingHtml->text();
161
        $ratingStringParts = explode(' ', $ratingString);
162
163
        return (float) $ratingStringParts[0];
164
    }
165
166
    /**
167
     * @param DomCrawler $articleCrawler
168
     * @return int
169
     */
170
    protected function extractRatingAmount(DomCrawler $articleCrawler): int
171
    {
172
        $ratingAmountHtml = $articleCrawler->filter('#j-customer-reviews-trigger .rantings-num');
173
        $ratingAmountString = $ratingAmountHtml->text();
174
        $ratingAmountStringParts = explode(' ', $ratingAmountString);
175
176
        return (int) str_replace('(', '', $ratingAmountStringParts[0]);
177
    }
178
179
    /**
180
     * @param DomCrawler $articleCrawler
181
     * @return string
182
     */
183
    protected function extractSeller(DomCrawler $articleCrawler): string
184
    {
185
        $sellerHtml = $articleCrawler->filter('#j-store-info-wrap .store-lnk');
186
        $sellerString = $sellerHtml->text();
187
188
        return $sellerString;
189
    }
190
191
    /**
192
     * @param DomCrawler $articleCrawler
193
     * @return float
194
     */
195
    protected function extractSellerFeedback(DomCrawler $articleCrawler): float
196
    {
197
        $sellerFeedbackHtml = $articleCrawler->filter('.seller-score-feedback span a');
198
        $sellerFeedbackString = str_replace('%', '', $sellerFeedbackHtml->text());
199
200
        return (float) $sellerFeedbackString;
201
    }
202
203
    /**
204
     * @param DomCrawler $articleCrawler
205
     * @return string
206
     */
207
    protected function extractSellerLocation(DomCrawler $articleCrawler): string
208
    {
209
        $sellerLocationHtml = $articleCrawler->filter('#j-store-info-wrap .store-address');
210
        $sellerLocationString = trim($sellerLocationHtml->text());
211
212
        return $sellerLocationString;
213
    }
214
215
    /**
216
     * @param DomCrawler $articleCrawler
217
     * @return int
218
     */
219
    protected function extractSellerSince(DomCrawler $articleCrawler): int
220
    {
221
        $sellerSinceHtml = $articleCrawler->filter('.store-open-time span');
222
        $sellerSinceString = $sellerSinceHtml->text();
223
        $sellerSinceStringParts = explode(' ', $sellerSinceString);
224
225
        return (int) $sellerSinceStringParts[0];
226
    }
227
228
    /**
229
     * @param DomCrawler $articleCrawler
230
     * @return int
231
     */
232
    protected function extractStock(DomCrawler $articleCrawler): int
233
    {
234
        $stockAmountHtml = $articleCrawler->filter('#j-sell-stock-num');
235
        $stockAmountString = $stockAmountHtml->text();
236
        $stockAmountStringParts = explode(' ', $stockAmountString);
237
238
        return (int) $stockAmountStringParts[0];
239
    }
240
}
241