Passed
Push — master ( 1a22f3...b63c45 )
by Sys
10:14
created

SchemaExtractor::getVersion()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace TgScraper\Common;
4
5
use Composer\InstalledVersions;
6
use Exception;
7
use GuzzleHttp\Client;
8
use GuzzleHttp\Exception\GuzzleException;
9
use InvalidArgumentException;
10
use JetBrains\PhpStorm\ArrayShape;
11
use OutOfBoundsException;
12
use Psr\Log\LoggerInterface;
13
use RuntimeException;
14
use TgScraper\Parsers\Field;
15
use TgScraper\Parsers\ObjectDescription;
16
use TgScraper\Constants\Versions;
17
use voku\helper\HtmlDomParser;
1 ignored issue
show
Bug introduced by
The type voku\helper\HtmlDomParser was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
18
use voku\helper\SimpleHtmlDomInterface;
19
use voku\helper\SimpleHtmlDomNode;
20
use voku\helper\SimpleHtmlDomNodeInterface;
21
22
/**
23
 * Class SchemaExtractor
24
 * @package TgScraper\Common
25
 */
26
class SchemaExtractor
27
{
28
    /**
29
     * @var string
30
     */
31
    private string $version;
32
33
    /**
34
     * SchemaExtractor constructor.
35
     * @param LoggerInterface $logger
36
     * @param HtmlDomParser $dom
37
     */
38
    public function __construct(private LoggerInterface $logger, private HtmlDomParser $dom)
39
    {
40
        $this->version = $this->parseVersion();
41
        $this->logger->info('Bot API version: ' . $this->version);
42
    }
43
44
45
    /**
46
     * @param LoggerInterface $logger
47
     * @param string $version
48
     * @return SchemaExtractor
49
     * @throws OutOfBoundsException
50
     * @throws Exception
51
     * @throws GuzzleException
52
     */
53
    public static function fromVersion(LoggerInterface $logger, string $version = Versions::LATEST): SchemaExtractor
54
    {
55
        if (InstalledVersions::isInstalled('sysbot/tgscraper-cache') and class_exists('\TgScraper\Cache\CacheLoader')) {
56
            $logger->info('Cache package detected, searching for a cached version.');
57
            try {
58
                /** @noinspection PhpFullyQualifiedNameUsageInspection */
59
                /** @noinspection PhpUndefinedNamespaceInspection */
60
                /** @psalm-suppress UndefinedClass */
61
                $path = \TgScraper\Cache\CacheLoader::getCachedVersion($version);
62
                $logger->info('Cached version found.');
63
                return self::fromFile($logger, $path);
64
            } catch (OutOfBoundsException) {
65
                $logger->info('Cached version not found, continuing with URL.');
66
            }
67
        }
68
        $url = Versions::getUrlFromText($version);
69
        $logger->info(sprintf('Using URL: %s', $url));
70
        return self::fromUrl($logger, $url);
71
    }
72
73
    /**
74
     * @param LoggerInterface $logger
75
     * @param string $path
76
     * @return SchemaExtractor
77
     * @throws InvalidArgumentException
78
     * @throws RuntimeException
79
     */
80
    public static function fromFile(LoggerInterface $logger, string $path): SchemaExtractor
81
    {
82
        if (!file_exists($path) or is_dir($path)) {
83
            throw new InvalidArgumentException('File not found');
84
        }
85
        $path = realpath($path);
86
        try {
87
            $logger->info(sprintf('Loading data from file "%s".', $path));
88
            $dom = HtmlDomParser::file_get_html($path);
89
            $logger->info('Data loaded.');
90
        } catch (RuntimeException $e) {
91
            $logger->critical(sprintf('Unable to load data from "%s": %s', $path, $e->getMessage()));
92
            throw $e;
93
        }
94
        return new self($logger, $dom);
95
    }
96
97
    /**
98
     * @param LoggerInterface $logger
99
     * @param string $url
100
     * @return SchemaExtractor
101
     * @throws GuzzleException
102
     */
103
    public static function fromUrl(LoggerInterface $logger, string $url): SchemaExtractor
104
    {
105
        $client = new Client();
106
        try {
107
            $html = $client->get($url)->getBody();
108
            $dom = HtmlDomParser::str_get_html((string)$html);
109
        } catch (GuzzleException $e) {
110
            $logger->critical(sprintf('Unable to load data from URL "%s": %s', $url, $e->getMessage()));
111
            throw $e;
112
        }
113
        $logger->info(sprintf('Data loaded from "%s".', $url));
114
        return new self($logger, $dom);
115
    }
116
117
    /**
118
     * @param SimpleHtmlDomInterface $node
119
     * @return array{description: string, table: ?SimpleHtmlDomNodeInterface, extended_by: string[]}
120
     */
121
    private static function parseNode(SimpleHtmlDomInterface $node): array
122
    {
123
        $description = '';
124
        $table = null;
125
        $extendedBy = [];
126
        $tag = '';
127
        $sibling = $node;
128
        while (!str_starts_with($tag ?? '', 'h')) {
129
            $sibling = $sibling?->nextSibling();
130
            $tag = $sibling?->tag;
131
            if (empty($node->text()) or empty($tag) or $tag == 'text' or empty($sibling)) {
132
                continue;
133
            }
134
            switch ($tag) {
135
                case 'p':
136
                    $description .= PHP_EOL . $sibling->innerHtml();
137
                    break;
138
                case 'ul':
139
                    $items = $sibling->findMulti('li');
140
                    foreach ($items as $item) {
141
                        $extendedBy[] = $item->text();
142
                    }
143
                    break 2;
144
                case 'table':
145
                    /** @var SimpleHtmlDomNodeInterface $table */
146
                    $table = $sibling->findOne('tbody')->findMulti('tr');
147
                    break 2;
148
            }
149
        }
150
        return ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy];
151
    }
152
153
    /**
154
     * @return string
155
     */
156
    private function parseVersion(): string
157
    {
158
        $element = $this->dom->findOne('h3');
159
        $tag = '';
160
        while ($tag != 'p' and !empty($element)) {
161
            $element = $element->nextSibling();
162
            $tag = $element?->tag;
163
        }
164
        if (empty($element)) {
165
            return '1.0.0';
166
        }
167
        $versionNumbers = explode('.', str_replace('Bot API ', '', $element->text()));
168
        return sprintf(
169
            '%s.%s.%s',
170
            $versionNumbers[0] ?? '1',
171
            $versionNumbers[1] ?? '0',
172
            $versionNumbers[2] ?? '0'
173
        );
174
    }
175
176
    /**
177
     * @return string
178
     */
179
    public function getVersion(): string
180
    {
181
        return $this->version;
182
    }
183
184
    /**
185
     * @return array{version: string, methods: array, types: array}
186
     * @throws Exception
187
     */
188
    public function extract(): array
189
    {
190
        $elements = $this->dom->findMultiOrFalse('h4');
191
        if (false === $elements) {
192
            throw new Exception('Unable to fetch required DOM nodes');
193
        }
194
        $data = ['version' => $this->version, 'methods' => [], 'types' => []];
195
        foreach ($elements as $element) {
196
            if (!str_contains($name = $element->text(), ' ')) {
197
                $isMethod = lcfirst($name) == $name;
198
                $path = $isMethod ? 'methods' : 'types';
199
                ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode(
200
                    $element
201
                );
202
                $data[$path][] = self::generateElement(
203
                    $name,
204
                    trim($description),
205
                    $table,
206
                    $extendedBy,
207
                    $isMethod
208
                );
209
            }
210
        }
211
        return $data;
212
    }
213
214
    /**
215
     * @param string $name
216
     * @param string $description
217
     * @param SimpleHtmlDomNodeInterface|null $unparsedFields
218
     * @param array $extendedBy
219
     * @param bool $isMethod
220
     * @return array
221
     */
222
    private static function generateElement(
223
        string $name,
224
        string $description,
225
        ?SimpleHtmlDomNodeInterface $unparsedFields,
226
        array $extendedBy,
227
        bool $isMethod
228
    ): array {
229
        $fields = self::parseFields($unparsedFields, $isMethod);
230
        $result = [
231
            'name' => $name,
232
            'description' => htmlspecialchars_decode(strip_tags($description), ENT_QUOTES),
233
            'fields' => $fields
234
        ];
235
        if ($isMethod) {
236
            $description = new ObjectDescription($description);
237
            $returnTypes = $description->getTypes();
238
            $result['return_types'] = $returnTypes;
239
            return $result;
240
        }
241
        $result['extended_by'] = $extendedBy;
242
        return $result;
243
    }
244
245
    /**
246
     * @param SimpleHtmlDomNodeInterface|null $fields
247
     * @param bool $isMethod
248
     * @return array
249
     */
250
    private static function parseFields(?SimpleHtmlDomNodeInterface $fields, bool $isMethod): array
251
    {
252
        $parsedFields = [];
253
        $fields ??= [];
254
        /** @var SimpleHtmlDomInterface $field */
255
        foreach ($fields as $field) {
256
            /** @var SimpleHtmlDomNode $fieldData */
257
            $fieldData = $field->findMulti('td');
258
            $name = $fieldData[0]->text();
259
            if (empty($name)) {
260
                continue;
261
            }
262
            $types = $fieldData[1]->text();
263
            if ($isMethod) {
264
                $optional = $fieldData[2]->text() != 'Yes';
265
                $description = $fieldData[3]->innerHtml();
266
            } else {
267
                $description = $fieldData[2]->innerHtml();
268
                $optional = str_starts_with($fieldData[2]->text(), 'Optional.');
269
            }
270
            $field = new Field($name, $types, $optional, $description);
271
            $parsedFields[] = $field->toArray();
272
        }
273
        return $parsedFields;
274
    }
275
}
276