Passed
Push — master ( 81593c...33129c )
by Alexey
10:12 queued 12s
created

ScraperUtil::getValue()   A

Complexity

Conditions 5
Paths 6

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 9
c 0
b 0
f 0
dl 0
loc 17
ccs 9
cts 9
cp 1
rs 9.6111
cc 5
nc 6
nop 3
crap 5
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * Copyright (c) Ne-Lexa
7
 *
8
 * For the full copyright and license information, please view
9
 * the LICENSE file that was distributed with this source code.
10
 *
11
 * @see https://github.com/Ne-Lexa/google-play-scraper
12
 */
13
14
namespace Nelexa\GPlay\Util;
15
16
/**
17
 * @internal
18
 */
19
class ScraperUtil
20
{
21
    /**
22
     * @param string $html
23
     *
24
     * @return array
25
     */
26 24
    public static function extractScriptData(string $html): array
27
    {
28 24
        $scripts = [];
29
30 24
        preg_match_all('/>AF_initDataCallback\((.*?)\);<\/script/s', $html, $matches);
31 24
        if ($matches) {
32 24
            $scripts = array_reduce(
33 24
                $matches[0],
34 24
                static function ($carry, $item) {
35
                    if (
36 24
                        preg_match("/(ds:.*?)'/", $item, $keyMatch)
37 24
                        && preg_match('/data:([\s\S]*?)(, }\);<\/|, sideChannel:)/', $item, $valueMatch)
38
                    ) {
39 24
                        $carry[$keyMatch[1]] = \GuzzleHttp\json_decode($valueMatch[1], true);
0 ignored issues
show
Deprecated Code introduced by
The function GuzzleHttp\json_decode() has been deprecated: json_decode will be removed in guzzlehttp/guzzle:8.0. Use Utils::jsonDecode instead. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

39
                        $carry[$keyMatch[1]] = /** @scrutinizer ignore-deprecated */ \GuzzleHttp\json_decode($valueMatch[1], true);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
40
                    }
41
42 24
                    return $carry;
43
                },
44
                $scripts
45
            );
46
        }
47
48 24
        return $scripts;
49
    }
50
51
    /**
52
     * @param string $html
53
     *
54
     * @return \DOMDocument
55
     */
56 31
    public static function createDomDocument(string $html): \DOMDocument
57
    {
58 31
        $doc = new \DOMDocument();
59 31
        $internalErrors = libxml_use_internal_errors(true);
60
61 31
        if (!$doc->loadHTML('<?xml encoding="utf-8"?>' . $html)) {
62
            throw new \RuntimeException(
63
                'error load html: ' . $html
64
            );
65
        }
66 31
        libxml_use_internal_errors($internalErrors);
67
68 31
        return $doc;
69
    }
70
71
    /**
72
     * @param string $html
73
     *
74
     * @return string
75
     */
76 31
    public static function html2text(string $html): string
77
    {
78 31
        $doc = self::createDomDocument($html);
79 31
        $text = self::convertDomNodeToText($doc);
80 31
        $text = preg_replace('/\n{3,}/', "\n\n", trim($text));
81
82 31
        return trim($text);
83
    }
84
85
    /**
86
     * @param \DOMNode $node
87
     *
88
     * @return string
89
     */
90 31
    private static function convertDomNodeToText(\DOMNode $node): string
91
    {
92 31
        if ($node instanceof \DOMText) {
93 31
            $text = preg_replace('/\s+/', ' ', $node->wholeText);
94
        } else {
95 31
            $text = '';
96
97 31
            if ($node->childNodes !== null) {
98 31
                foreach ($node->childNodes as $childNode) {
99 31
                    $text .= self::convertDomNodeToText($childNode);
100
                }
101
            }
102
103 31
            switch ($node->nodeName) {
104 31
                case 'h1':
105 31
                case 'h2':
106 31
                case 'h3':
107 31
                case 'h4':
108 31
                case 'h5':
109 31
                case 'h6':
110 31
                case 'p':
111 31
                case 'ul':
112 31
                case 'div':
113 31
                    $text = "\n\n" . $text . "\n\n";
114 31
                    break;
115
116 31
                case 'li':
117 13
                    $text = '- ' . $text . "\n";
118 13
                    break;
119
120 31
                case 'br':
121 31
                    $text .= "\n";
122 31
                    break;
123
            }
124
        }
125
126 31
        return $text;
127
    }
128
129
    /**
130
     * @param array        $array
131
     * @param array|string $path
132
     * @param string       $glue
133
     *
134
     * @return mixed
135
     */
136 19
    public static function getValue(array &$array, $path, string $glue = '.')
137
    {
138 19
        if (!\is_array($path)) {
139 19
            $path = explode($glue, (string) $path);
140
        }
141
142 19
        $ref = &$array;
143
144 19
        foreach ((array) $path as $parent) {
145 19
            if (\is_array($ref) && \array_key_exists($parent, $ref)) {
146 19
                $ref = &$ref[$parent];
147
            } else {
148 11
                return null;
149
            }
150
        }
151
152 19
        return $ref;
153
    }
154
}
155