Passed
Branch develop (61f351)
by Alexey
01:52
created

CategoryAppsScraper::extractApps()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 37
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 23
nc 2
nop 4
dl 0
loc 37
rs 9.552
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace Nelexa\GPlay\Scraper;
5
6
use Nelexa\GPlay\Exception\GooglePlayException;
7
use Nelexa\GPlay\GPlayApps;
8
use Nelexa\GPlay\Http\ResponseHandlerInterface;
9
use Nelexa\GPlay\Model\App;
10
use Nelexa\GPlay\Model\Developer;
11
use Nelexa\GPlay\Model\GoogleImage;
12
use Nelexa\GPlay\Util\ScraperUtil;
13
use Psr\Http\Message\RequestInterface;
14
use Psr\Http\Message\ResponseInterface;
15
use function GuzzleHttp\Psr7\parse_query;
16
17
class CategoryAppsScraper implements ResponseHandlerInterface
18
{
19
    /**
20
     * @param RequestInterface $request
21
     * @param ResponseInterface $response
22
     * @return App[]
23
     * @throws GooglePlayException
24
     */
25
    public function __invoke(RequestInterface $request, ResponseInterface $response)
26
    {
27
        $locale = parse_query($request->getUri()->getQuery())[GPlayApps::REQ_PARAM_LOCALE] ?? GPlayApps::DEFAULT_LOCALE;
28
29
        $xpath = $this->getXPath($response);
30
        $cardNodes = $xpath->query("//div[@class and contains(concat(' ', normalize-space(@class), ' '), ' card ') and @data-docid]");
31
32
        $apps = [];
33
        foreach ($cardNodes as $cardNode) {
34
            $apps[] = $this->extractApps($request, $xpath, $cardNode, $locale);
35
        }
36
        return $apps;
37
    }
38
39
    /**
40
     * @param ResponseInterface $response
41
     * @return \DOMXPath
42
     */
43
    private function getXPath(ResponseInterface $response): \DOMXPath
44
    {
45
        $doc = new \DOMDocument();
46
        $internalErrors = libxml_use_internal_errors(true);
47
        if (!$doc->loadHTML('<?xml encoding="utf-8" ?>' . $response->getBody()->getContents())) {
48
            throw new \RuntimeException('error load html');
49
        }
50
        libxml_use_internal_errors($internalErrors);
51
52
        return new \DOMXPath($doc);
53
    }
54
55
    /**
56
     * @param RequestInterface $request
57
     * @param \DOMXPath $xpath
58
     * @param \DOMElement $cardNode
59
     * @param string $locale
60
     * @return App
61
     * @throws GooglePlayException
62
     */
63
    private function extractApps(
64
        RequestInterface $request,
65
        \DOMXPath $xpath,
66
        \DOMElement $cardNode,
67
        string $locale
68
    ): App {
69
        {
70
            $appId = $cardNode->getAttribute('data-docid');
71
        }
72
73
        {
74
            $nodeTitle = $xpath->query('.//a[@class="title"]', $cardNode)->item(0);
75
            if ($nodeTitle === null) {
76
                throw (new GooglePlayException('Error parse app list'))
77
                    ->setUrl($request->getUri()->__toString());
78
            }
79
            $url = GPlayApps::GOOGLE_PLAY_URL . $nodeTitle->attributes->getNamedItem('href')->textContent;
80
            $name = trim($nodeTitle->attributes->getNamedItem('title')->textContent);
81
        }
82
83
        $summary = $this->extractSummary($xpath, $cardNode);
84
        $developer = $this->extractDeveloper($request, $xpath, $cardNode);
85
        $icon = $this->extractIcon($request, $xpath, $cardNode);
86
        $price = $this->extractPrice($xpath, $cardNode);
87
        $score = $this->extractScore($xpath, $cardNode);
88
89
        return new App(
90
            App::newBuilder()
91
                ->setId($appId)
92
                ->setUrl($url)
93
                ->setLocale($locale)
94
                ->setName($name)
95
                ->setSummary($summary)
96
                ->setDeveloper($developer)
97
                ->setIcon($icon)
98
                ->setScore($score)
99
                ->setPriceText($price)
100
        );
101
    }
102
103
    /**
104
     * @param \DOMXPath $xpath
105
     * @param \DOMElement $cardNode
106
     * @return string|null
107
     */
108
    private function extractSummary(\DOMXPath $xpath, \DOMElement $cardNode): ?string
109
    {
110
        $descriptionNode = $xpath->query('.//div[@class="description"]', $cardNode)->item(0);
111
        if ($descriptionNode !== null) {
112
            return ScraperUtil::html2text($descriptionNode->textContent);
113
        }
114
        return null;
115
    }
116
117
    /**
118
     * @param RequestInterface $request
119
     * @param \DOMXPath $xpath
120
     * @param \DOMElement $cardNode
121
     * @return Developer
122
     * @throws GooglePlayException
123
     */
124
    private function extractDeveloper(RequestInterface $request, \DOMXPath $xpath, \DOMElement $cardNode): Developer
125
    {
126
        $developerNode = $xpath->query('.//a[@class="subtitle"]', $cardNode)->item(0);
127
        if ($developerNode === null) {
128
            throw (new GooglePlayException('Error parse app list developer node'))
129
                ->setUrl($request->getUri()->__toString());
130
        }
131
        $developerName = trim($developerNode->textContent);
132
        $developerUrl = GPlayApps::GOOGLE_PLAY_URL . $developerNode->attributes->getNamedItem('href')->textContent;
133
        $developerId = parse_query(parse_url($developerUrl, PHP_URL_QUERY))[GPlayApps::REQ_PARAM_ID];
134
        $developer = new Developer(
135
            Developer::newBuilder()
136
                ->setId($developerId)
137
                ->setUrl($developerUrl)
138
                ->setName($developerName)
139
        );
140
        return $developer;
141
    }
142
143
    /**
144
     * @param RequestInterface $request
145
     * @param \DOMXPath $xpath
146
     * @param \DOMElement $cardNode
147
     * @return GoogleImage
148
     * @throws GooglePlayException
149
     */
150
    private function extractIcon(RequestInterface $request, \DOMXPath $xpath, \DOMElement $cardNode): GoogleImage
151
    {
152
        $iconNode = $xpath->query('.//img[@data-cover-large]/@src', $cardNode)->item(0);
153
        if ($iconNode === null) {
154
            throw (new GooglePlayException('Error parse app list icon node'))
155
                ->setUrl($request->getUri()->__toString());
156
        }
157
        $icon = new GoogleImage('https:' . $iconNode->textContent);
158
        $icon->reset();
159
        return $icon;
160
    }
161
162
    /**
163
     * @param \DOMXPath $xpath
164
     * @param \DOMElement $cardNode
165
     * @return string|null
166
     */
167
    private function extractPrice(\DOMXPath $xpath, \DOMElement $cardNode): ?string
168
    {
169
        $priceNode = $xpath->query('.//span[@class="display-price"]', $cardNode);
170
        if ($priceNode->length > 0) {
171
            $price = trim($priceNode->item(0)->textContent);
172
            if (!empty($price)) {
173
                return $price;
174
            }
175
        }
176
        return null;
177
    }
178
179
    /**
180
     * @param \DOMXPath $xpath
181
     * @param \DOMElement $cardNode
182
     * @return float
183
     */
184
    private function extractScore(\DOMXPath $xpath, \DOMElement $cardNode): float
185
    {
186
        $ratingStyleAttr = $xpath->query('.//div[@class="current-rating" and @style]/@style', $cardNode)->item(0);
187
        if ($ratingStyleAttr !== null) {
188
            $ratingStyle = $ratingStyleAttr->textContent;
189
            if (preg_match('/([\d\.]+)%/', $ratingStyle, $match)) {
190
                return round($match[1] * 0.05, 1); // percent * 5 star and round result
191
            }
192
        }
193
        return 0;
194
    }
195
}
196