Passed
Branch develop (2fd4b5)
by Alexey
01:37
created

AppDetailScraper   B

Complexity

Total Complexity 52

Size/Duplication

Total Lines 365
Duplicated Lines 0 %

Test Coverage

Coverage 96.94%

Importance

Changes 0
Metric Value
eloc 188
dl 0
loc 365
ccs 190
cts 196
cp 0.9694
rs 7.44
c 0
b 0
f 0
wmc 52

17 Methods

Rating   Name   Duplication   Size   Complexity  
A extractTranslatedFromLocale() 0 8 2
A extractRecentChanges() 0 5 2
A extractDeveloper() 0 18 1
C getScriptData() 0 37 15
A extractReviews() 0 9 2
A extractPrice() 0 5 2
A extractDescription() 0 7 2
A extractSummary() 0 5 2
A extractHistogramRating() 0 8 1
A extractIcon() 0 5 2
B __invoke() 0 102 5
A extractReleaseDate() 0 6 2
A extractVideo() 0 13 4
A extractScreenshots() 0 5 2
A extractUpdatedDate() 0 6 2
A extractCover() 0 5 2
A extractCategory() 0 8 4

How to fix   Complexity   

Complex Class

Complex classes like AppDetailScraper often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AppDetailScraper, and based on these observations, apply Extract Interface, too.

1
<?php
2
/** @noinspection MultiAssignmentUsageInspection */
3
declare(strict_types=1);
4
5
/**
6
 * @author   Ne-Lexa
7
 * @license  MIT
8
 * @link     https://github.com/Ne-Lexa/google-play-scraper
9
 */
10
11
namespace Nelexa\GPlay\Scraper;
12
13
use Nelexa\GPlay\Exception\GooglePlayException;
14
use Nelexa\GPlay\GPlayApps;
15
use Nelexa\GPlay\Http\ResponseHandlerInterface;
16
use Nelexa\GPlay\Model\AppDetail;
17
use Nelexa\GPlay\Model\AppId;
18
use Nelexa\GPlay\Model\Category;
19
use Nelexa\GPlay\Model\Developer;
20
use Nelexa\GPlay\Model\GoogleImage;
21
use Nelexa\GPlay\Model\HistogramRating;
22
use Nelexa\GPlay\Model\Review;
23
use Nelexa\GPlay\Model\Video;
24
use Nelexa\GPlay\Scraper\Extractor\ReviewsExtractor;
25
use Nelexa\GPlay\Util\DateStringFormatter;
26
use Nelexa\GPlay\Util\LocaleHelper;
27
use Nelexa\GPlay\Util\ScraperUtil;
28
use Psr\Http\Message\RequestInterface;
29
use Psr\Http\Message\ResponseInterface;
30
use function GuzzleHttp\Psr7\parse_query;
31
32
/**
33
 * @internal
34
 */
35
class AppDetailScraper implements ResponseHandlerInterface
36
{
37
    /**
38
     * @param RequestInterface $request
39
     * @param ResponseInterface $response
40
     * @return AppDetail
41
     * @throws GooglePlayException
42
     */
43 7
    public function __invoke(RequestInterface $request, ResponseInterface $response): AppDetail
44
    {
45 7
        $query = parse_query($request->getUri()->getQuery());
46
47 7
        $id = $query[GPlayApps::REQ_PARAM_ID];
48 7
        $locale = $query[GPlayApps::REQ_PARAM_LOCALE] ?? GPlayApps::DEFAULT_LOCALE;
49 7
        $country = $query[GPlayApps::REQ_PARAM_COUNTRY] ?? GPlayApps::DEFAULT_COUNTRY;
50
51
        [
52
            $scriptDataInfo,
53
            $scriptDataRating,
54
            $scriptDataPrice,
55
            $scriptDataVersion,
56
            $scriptDataReviews,
57 7
        ] = $this->getScriptData($request, $response);
58
59 7
        $name = $scriptDataInfo[0][0][0];
60 7
        $description = $this->extractDescription($scriptDataInfo);
61 7
        $translatedFromLocale = $this->extractTranslatedFromLocale($scriptDataInfo, $locale);
62 7
        $developer = $this->extractDeveloper($scriptDataInfo);
63 7
        $category = $this->extractCategory($scriptDataInfo[0][12][13][0]);
64 7
        $summary = $this->extractSummary($scriptDataInfo);
65 7
        $installs = $scriptDataInfo[0][12][9][2] ?? 0;
66 7
        $score = (float)($scriptDataRating[0][6][0][1] ?? 0);
67 7
        $numberVoters = (int)($scriptDataRating[0][6][2][1] ?? 0);
68 7
        $numberReviews = (int)($scriptDataRating[0][6][3][1] ?? 0);
69 7
        $histogramRating = $this->extractHistogramRating($scriptDataRating);
70 7
        $price = $this->extractPrice($scriptDataPrice);
71 7
        $currency = $scriptDataPrice[0][2][0][0][0][1][0][1];
72 7
        $priceText = $scriptDataPrice[0][2][0][0][0][1][0][2] ?: null;
73 7
        $offersIAPCost = $scriptDataInfo[0][12][12][0] ?? null;
74 7
        $containsAds = (bool)$scriptDataInfo[0][12][14][0];
75
76 7
        [$size, $appVersion, $androidVersion] = $scriptDataVersion;
77 7
        if (LocaleHelper::isDependOnDevice($locale, $size)) {
78 5
            $size = null;
79
        }
80 7
        if (LocaleHelper::isDependOnDevice($locale, $appVersion)) {
81 3
            $appVersion = null;
82
        }
83 7
        if (LocaleHelper::isDependOnDevice($locale, $androidVersion)) {
84 3
            $androidVersion = null;
85 3
            $minAndroidVersion = null;
86
        } else {
87 4
            $minAndroidVersion = preg_replace('~.*?(\d+(\.\d+)*).*~', '$1', $androidVersion);
88
        }
89
90 7
        $editorsChoice = !empty($scriptDataInfo[0][12][15][1][1]);
91 7
        $privacyPoliceUrl = $scriptDataInfo[0][12][7][2] ?? '';
92 7
        $categoryFamily = $this->extractCategory($scriptDataInfo[0][12][13][1] ?? []);
93 7
        $icon = $this->extractIcon($scriptDataInfo);
94 7
        $cover = $this->extractCover($scriptDataInfo);
95 7
        $screenshots = $this->extractScreenshots($scriptDataInfo);
96 7
        $video = $this->extractVideo($scriptDataInfo);
97 7
        $contentRating = $scriptDataInfo[0][12][4][0] ?? '';
98 7
        $released = $this->extractReleaseDate($scriptDataInfo, $locale);
99 7
        $updated = $this->extractUpdatedDate($scriptDataInfo);
100 7
        $recentChanges = $this->extractRecentChanges($scriptDataInfo);
101 7
        $reviews = $this->extractReviews(new AppId($id, $locale, $country), $scriptDataReviews);
102
103 7
        return new AppDetail(
104 7
            AppDetail::newBuilder()
105 7
                ->setId($id)
106 7
                ->setLocale($locale)
107 7
                ->setCountry($country)
108 7
                ->setName($name)
109 7
                ->setDescription($description)
110 7
                ->setTranslatedFromLocale($translatedFromLocale)
111 7
                ->setSummary($summary)
112 7
                ->setIcon($icon)
113 7
                ->setCover($cover)
114 7
                ->setScreenshots($screenshots)
115 7
                ->setDeveloper($developer)
116 7
                ->setCategory($category)
117 7
                ->setCategoryFamily($categoryFamily)
118 7
                ->setVideo($video)
119 7
                ->setRecentChanges($recentChanges)
120 7
                ->setEditorsChoice($editorsChoice)
121 7
                ->setPrivacyPoliceUrl($privacyPoliceUrl)
122 7
                ->setInstalls($installs)
123 7
                ->setScore($score)
124 7
                ->setRecentChanges($recentChanges)
125 7
                ->setEditorsChoice($editorsChoice)
126 7
                ->setPrivacyPoliceUrl($privacyPoliceUrl)
127 7
                ->setInstalls($installs)
128 7
                ->setScore($score)
129 7
                ->setNumberVoters($numberVoters)
130 7
                ->setHistogramRating($histogramRating)
131 7
                ->setPrice($price)
132 7
                ->setCurrency($currency)
133 7
                ->setPriceText($priceText)
134 7
                ->setOffersIAPCost($offersIAPCost)
135 7
                ->setContainsAds($containsAds)
136 7
                ->setSize($size)
137 7
                ->setAppVersion($appVersion)
138 7
                ->setAndroidVersion($androidVersion)
139 7
                ->setMinAndroidVersion($minAndroidVersion)
140 7
                ->setContentRating($contentRating)
141 7
                ->setReleased($released)
142 7
                ->setUpdated($updated)
143 7
                ->setNumberReviews($numberReviews)
144 7
                ->setReviews($reviews)
145
        );
146
    }
147
148
    /**
149
     * @param RequestInterface $request
150
     * @param ResponseInterface $response
151
     * @return array
152
     * @throws GooglePlayException
153
     */
154 7
    private function getScriptData(RequestInterface $request, ResponseInterface $response): array
155
    {
156 7
        $scriptData = ScraperUtil::extractScriptData($response->getBody()->getContents());
157
158 7
        $scriptDataInfo = null;
159 7
        $scriptDataRating = null;
160 7
        $scriptDataPrice = null;
161 7
        $scriptDataVersion = null;
162 7
        $scriptDataReviews = [];
163
164 7
        foreach ($scriptData as $key => $scriptValue) {
165 7
            if (isset($scriptValue[0][12][5][5][4][2])) { // ds:5
166 7
                $scriptDataInfo = $scriptValue;
167 7
            } elseif (isset($scriptValue[0][2][0][0][0][1][0][0])) { // ds:3
168 7
                $scriptDataPrice = $scriptValue;
169 7
            } elseif (isset($scriptValue[0][0][0])
170 7
                && is_string($scriptValue[0][0][0])
171 7
                && strpos($scriptValue[0][0][0], 'gp:') === 0) { // ds:15
172 7
                $scriptDataReviews = $scriptValue;
173 7
            } elseif (isset($scriptValue[0][6][3][1])) { // ds:7
174 7
                $scriptDataRating = $scriptValue;
175 7
            } elseif (isset($scriptValue[0])
176 7
                && is_string($scriptValue[0])
177 7
                && count($scriptValue) === 3) { // ds:8
178 7
                $scriptDataVersion = $scriptValue;
179
            }
180
        }
181
182
        if (
183 7
            $scriptDataInfo === null ||
184 7
            $scriptDataRating === null ||
185 7
            $scriptDataPrice === null ||
186 7
            $scriptDataVersion === null
0 ignored issues
show
introduced by
The condition $scriptDataVersion === null is always true.
Loading history...
187
        ) {
188
            throw (new GooglePlayException('Unable to get data for this application.'))->setUrl($request->getUri()->__toString());
189
        }
190 7
        return [$scriptDataInfo, $scriptDataRating, $scriptDataPrice, $scriptDataVersion, $scriptDataReviews];
191
    }
192
193
    /**
194
     * @param $scriptDataInfo
195
     * @param string $locale
196
     * @return string|null
197
     */
198 7
    private function extractTranslatedFromLocale(array $scriptDataInfo, string $locale): ?string
199
    {
200 7
        return isset($scriptDataInfo[0][19][1]) ?
201 2
            LocaleHelper::findPreferredLanguage(
202 2
                $locale,
203 2
                $scriptDataInfo[0][19][1]
204
            ) :
205 7
            null;
206
    }
207
208
    /**
209
     * @param array $scriptDataInfo
210
     * @return string
211
     */
212 7
    private function extractDescription(array $scriptDataInfo): string
213
    {
214 7
        if (isset($scriptDataInfo[0][19][0][0][1])) {
215 2
            return ScraperUtil::html2text($scriptDataInfo[0][19][0][0][1]);
216
        }
217
218 7
        return ScraperUtil::html2text($scriptDataInfo[0][10][0][1]);
219
    }
220
221
    /**
222
     * @param $scriptDataInfo
223
     * @return string|null
224
     */
225 7
    private function extractSummary(array $scriptDataInfo): ?string
226
    {
227 7
        return empty($scriptDataInfo[0][10][1][1]) ?
228
            null :
229 7
            ScraperUtil::html2text($scriptDataInfo[0][10][1][1]);
230
    }
231
232
    /**
233
     * @param array $scriptDataInfo
234
     * @return Developer
235
     */
236 7
    private function extractDeveloper(array $scriptDataInfo): Developer
237
    {
238 7
        $developerPage = GPlayApps::GOOGLE_PLAY_URL . $scriptDataInfo[0][12][5][5][4][2];
239 7
        $developerId = parse_query(parse_url($developerPage, PHP_URL_QUERY))[GPlayApps::REQ_PARAM_ID];
240 7
        $developerName = $scriptDataInfo[0][12][5][1];
241 7
        $developerEmail = $scriptDataInfo[0][12][5][2][0];
242 7
        $developerWebsite = $scriptDataInfo[0][12][5][3][5][2];
243 7
        $developerAddress = $scriptDataInfo[0][12][5][4][0];
244
//        $developerInternalID = (int)$scriptDataInfo[0][12][5][0][0];
245
246 7
        return new Developer(
247 7
            Developer::newBuilder()
248 7
                ->setId($developerId)
249 7
                ->setUrl($developerPage)
250 7
                ->setName($developerName)
251 7
                ->setEmail($developerEmail)
252 7
                ->setAddress($developerAddress)
253 7
                ->setWebsite($developerWebsite)
254
        );
255
    }
256
257
    /**
258
     * @param array $data
259
     * @return Category|null
260
     */
261 7
    private function extractCategory(array $data): ?Category
262
    {
263 7
        if (isset($data[0]) && $data[0] !== null && $data[2] !== null) {
264 7
            $genreId = (string)$data[2];
265 7
            $genreName = (string)$data[0];
266 7
            return new Category($genreId, $genreName);
267
        }
268 7
        return null;
269
    }
270
271
    /**
272
     * @param array $scriptDataRating
273
     * @return HistogramRating
274
     */
275 7
    private function extractHistogramRating(array $scriptDataRating): HistogramRating
276
    {
277 7
        return new HistogramRating(
278 7
            $scriptDataRating[0][6][1][5][1] ?? 0,
279 7
            $scriptDataRating[0][6][1][4][1] ?? 0,
280 7
            $scriptDataRating[0][6][1][3][1] ?? 0,
281 7
            $scriptDataRating[0][6][1][2][1] ?? 0,
282 7
            $scriptDataRating[0][6][1][1][1] ?? 0
283
        );
284
    }
285
286
    /**
287
     * @param $scriptDataPrice
288
     * @return float
289
     */
290 7
    protected function extractPrice(array $scriptDataPrice): ?float
291
    {
292 7
        return isset($scriptDataPrice[0][2][0][0][0][1][0][0]) ?
293 7
            (float)($scriptDataPrice[0][2][0][0][0][1][0][0] / 1000000) :
294 7
            0.0;
295
    }
296
297
    /**
298
     * @param array $scriptDataInfo
299
     * @return GoogleImage|null
300
     */
301 7
    protected function extractIcon(array $scriptDataInfo): ?GoogleImage
302
    {
303 7
        return empty($scriptDataInfo[0][12][1][3][2]) ?
304
            null :
305 7
            new GoogleImage($scriptDataInfo[0][12][1][3][2]);
306
    }
307
308
    /**
309
     * @param array $scriptDataInfo
310
     * @return GoogleImage|null
311
     */
312 7
    protected function extractCover(array $scriptDataInfo): ?GoogleImage
313
    {
314 7
        return empty($scriptDataInfo[0][12][2][3][2]) ?
315
            null :
316 7
            new GoogleImage($scriptDataInfo[0][12][2][3][2]);
317
    }
318
319
    /**
320
     * @param array $scriptDataInfo
321
     * @return GoogleImage[]
322
     */
323 7
    private function extractScreenshots(array $scriptDataInfo): array
324
    {
325
        return !empty($scriptDataInfo[0][12][0]) ? array_map(static function (array $v) {
326 7
            return new GoogleImage($v[3][2]);
327 7
        }, $scriptDataInfo[0][12][0]) : [];
328
    }
329
330
    /**
331
     * @param array $scriptDataInfo
332
     * @return Video|null
333
     */
334 7
    private function extractVideo(array $scriptDataInfo): ?Video
335
    {
336
        if (
337 7
            isset($scriptDataInfo[0][12][3][0][3][2]) &&
338 7
            $scriptDataInfo[0][12][3][0][3][2] !== null &&
339 7
            $scriptDataInfo[0][12][3][1][3][2] !== null
340
        ) {
341 2
            $videoThumb = (string)$scriptDataInfo[0][12][3][1][3][2];
342 2
            $videoUrl = (string)$scriptDataInfo[0][12][3][0][3][2];
343
344 2
            return new Video($videoThumb, $videoUrl);
345
        }
346 6
        return null;
347
    }
348
349
    /**
350
     * @param array $scriptDataInfo
351
     * @param string $locale
352
     * @return \DateTimeInterface|null
353
     */
354 7
    private function extractReleaseDate(array $scriptDataInfo, string $locale): ?\DateTimeInterface
355
    {
356 7
        if (isset($scriptDataInfo[0][12][36])) {
357 6
            return DateStringFormatter::formatted($locale, $scriptDataInfo[0][12][36]);
358
        }
359 1
        return null;
360
    }
361
362
    /**
363
     * @param array $scriptDataInfo
364
     * @return \DateTimeInterface|null
365
     */
366 7
    private function extractUpdatedDate(array $scriptDataInfo): ?\DateTimeInterface
367
    {
368 7
        if (isset($scriptDataInfo[0][12][8][0])) {
369 7
            return DateStringFormatter::unixTimeToDateTime($scriptDataInfo[0][12][8][0]);
370
        }
371
        return null;
372
    }
373
374
    /**
375
     * @param $scriptDataInfo
376
     * @return string|null
377
     */
378 7
    protected function extractRecentChanges($scriptDataInfo): ?string
379
    {
380 7
        return empty($scriptDataInfo[0][12][6][1]) ?
381
            null :
382 7
            ScraperUtil::html2text($scriptDataInfo[0][12][6][1]);
383
    }
384
385
    /**
386
     * @param AppId $appId
387
     * @param array $scriptDataReviews
388
     * @param int $limit
389
     * @return Review[]
390
     */
391 7
    private function extractReviews(AppId $appId, array $scriptDataReviews, int $limit = 4): array
392
    {
393 7
        if (empty($scriptDataReviews[0])) {
394 1
            return [];
395
        }
396
397 7
        return ReviewsExtractor::extractReviews(
398 7
            $appId,
399 7
            array_slice($scriptDataReviews[0], 0, $limit)
400
        );
401
    }
402
}
403