Passed
Push — master ( 80ad96...45165d )
by Alexey
03:00
created

AppReviewScraper   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 144
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 19
eloc 77
dl 0
loc 144
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
F __invoke() 0 137 19
1
<?php
2
declare(strict_types=1);
3
4
namespace Nelexa\GPlay\Scraper;
5
6
use function GuzzleHttp\Psr7\parse_query;
7
use Nelexa\GPlay\GPlayApps;
8
use Nelexa\GPlay\Http\ResponseHandlerInterface;
9
use Nelexa\GPlay\Model\GoogleImage;
10
use Nelexa\GPlay\Model\ReplyReview;
11
use Nelexa\GPlay\Model\Review;
12
use Nelexa\GPlay\Util\DateStringFormatter;
13
use Nelexa\GPlay\Util\LocaleHelper;
14
use Psr\Http\Message\RequestInterface;
15
use Psr\Http\Message\ResponseInterface;
16
17
class AppReviewScraper implements ResponseHandlerInterface
18
{
19
    /**
20
     * @param RequestInterface $request
21
     * @param ResponseInterface $response
22
     * @return Review[]
23
     */
24
    public function __invoke(RequestInterface $request, ResponseInterface $response): array
25
    {
26
        $contents = substr($response->getBody()->getContents(), 6);
27
        $json = \GuzzleHttp\json_decode($contents, true);
28
        $html = $json[0][2];
29
30
        $doc = new \DOMDocument();
31
        $internalErrors = libxml_use_internal_errors(true);
32
        if (!$doc->loadHTML('<?xml encoding="utf-8" ?>' . $html)) {
33
            throw new \RuntimeException('error load html');
34
        }
35
        libxml_use_internal_errors($internalErrors);
36
37
        $locale = parse_query($request->getUri()->getQuery())[GPlayApps::REQ_PARAM_LOCALE] ?? GPlayApps::DEFAULT_LOCALE;
38
39
        $reviews = [];
40
41
        $xpath = new \DOMXPath($doc);
42
        $reviewNodes = $xpath->query('//div[@class="single-review"]');
43
        /**
44
         * @var \DOMElement $reviewNode
45
         */
46
        foreach ($reviewNodes as $reviewNode) {
47
            // review id
48
            {
49
                $reviewIdNode = $xpath->query('.//div[@data-reviewid]', $reviewNode)->item(0);
50
                if ($reviewIdNode === null) {
51
                    continue;
52
                }
53
                $reviewId = $reviewIdNode->attributes->getNamedItem('data-reviewid')->textContent;
54
55
                // url
56
                {
57
                    $reviewUrlNode = $xpath->query('.//a[@class="reviews-permalink"]/@href', $reviewIdNode);
58
                    if ($reviewUrlNode === null) {
59
                        continue;
60
                    }
61
                    $reviewUrl = GPlayApps::GOOGLE_PLAY_URL . $reviewUrlNode->item(0)->textContent;
62
                }
63
                // user name
64
                {
65
                    $userNameNode = $xpath->query('.//span[@class="author-name"]', $reviewIdNode)->item(0);
66
                    if ($userNameNode === null) {
67
                        continue;
68
                    }
69
                    $userName = trim($userNameNode->textContent);
70
                }
71
                // review date
72
                {
73
                    $reviewDateNode = $xpath->query('.//span[@class="review-date"]', $reviewIdNode)->item(0);
74
                    if ($reviewDateNode === null) {
75
                        continue;
76
                    }
77
                    $reviewDate = trim($reviewDateNode->textContent);
78
                    $reviewDateTime = DateStringFormatter::formatted($locale, $reviewDate);
79
                }
80
            }
81
            // avatar
82
            {
83
                $avatar = null;
84
                $avatarAttr = $xpath->query('.//span[@class="responsive-img-hdpi"]/span/@style', $reviewNode)->item(0);
85
                if ($avatarAttr !== null) {
86
                    $style = $avatarAttr->textContent;
87
                    if (preg_match('~url\s*\((.*?)\)~', $style, $match)) {
88
                        $avatarImage = $match[1];
89
                        $avatarImage = str_replace(['"', "'"], '', $avatarImage);
90
                        $avatar = new GoogleImage($avatarImage);
91
                    }
92
                }
93
            }
94
            // rating
95
            {
96
                $score = 0;
97
                $ratingStyleAttr = $xpath->query('.//div[@class="current-rating" and @style]/@style', $reviewNode)->item(0);
98
                if ($ratingStyleAttr !== null) {
99
                    $ratingStyle = $ratingStyleAttr->textContent;
100
                    if (preg_match('/([\d]+)%/', $ratingStyle, $match)) {
101
                        $score = (int)($match[1] * 0.05); // percent * 5 star
102
                    }
103
                }
104
            }
105
            // text
106
            {
107
                $reviewTextNode = $xpath->query(".//div[@class and contains(concat(' ', normalize-space(@class), ' '), ' review-body ')]", $reviewNode)->item(0);
108
                if ($reviewTextNode === null) {
109
                    continue;
110
                }
111
                $nodeReviewLinkNode = $xpath->query(".//div[@class='review-link']", $reviewTextNode);
112
                if ($nodeReviewLinkNode->length > 0) {
113
                    $reviewTextNode->removeChild($nodeReviewLinkNode->item(0));
114
                }
115
                $reviewText = trim($reviewTextNode->textContent);
116
            }
117
            // reply
118
            {
119
                $reply = null;
120
                if ($reviewNode->nextSibling !== null && $reviewNode->nextSibling->nextSibling->getAttribute('class') === 'developer-reply') {
121
                    $replyDateTime = null;
122
                    $replyNode = $reviewNode->nextSibling->nextSibling;
123
                    // reply date
124
                    {
125
                        $replyDateNode = $xpath->query('.//span[@class="review-date"]', $replyNode)->item(0);
126
                        if ($replyDateNode !== null) {
127
                            $replyDate = trim($replyDateNode->textContent);
128
                            $replyDateTime = DateStringFormatter::formatted($locale, $replyDate);
129
                        }
130
                    }
131
                    // reply text
132
                    {
133
                        $replyText = null;
134
                        $replyTextNode = $replyNode->childNodes->item(2);
135
                        if ($replyTextNode !== null) {
136
                            $replyText = trim($replyTextNode->textContent);
137
                        }
138
                    }
139
                    if ($replyDateTime !== null && $replyText !== null) {
140
                        $reply = new ReplyReview(
141
                            $replyDateTime,
142
                            $replyText
143
                        );
144
                    }
145
                }
146
            }
147
148
            $reviews[] = new Review(
149
                $reviewId,
150
                $reviewUrl,
151
                $userName,
152
                $reviewText,
153
                $avatar,
0 ignored issues
show
Bug introduced by
It seems like $avatar can also be of type null; however, parameter $avatar of Nelexa\GPlay\Model\Review::__construct() does only seem to accept Nelexa\GPlay\Model\GoogleImage, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

153
                /** @scrutinizer ignore-type */ $avatar,
Loading history...
154
                $reviewDateTime,
155
                $score,
156
                0,
157
                $reply
158
            );
159
        }
160
        return $reviews;
161
    }
162
}
163