Passed
Push — master ( 3eef21...137960 )
by Sys
02:03
created

SchemaExtractor::extract()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 28
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 20
c 1
b 0
f 0
dl 0
loc 28
rs 9.2888
cc 5
nc 5
nop 0
1
<?php
2
3
4
namespace TgScraper\Common;
5
6
7
use JetBrains\PhpStorm\ArrayShape;
8
use PHPHtmlParser\Dom;
9
use PHPHtmlParser\Exceptions\ChildNotFoundException;
10
use PHPHtmlParser\Exceptions\CircularException;
11
use PHPHtmlParser\Exceptions\ContentLengthException;
12
use PHPHtmlParser\Exceptions\LogicalException;
13
use PHPHtmlParser\Exceptions\NotLoadedException;
14
use PHPHtmlParser\Exceptions\ParentNotFoundException;
15
use PHPHtmlParser\Exceptions\StrictException;
16
use Psr\Http\Client\ClientExceptionInterface;
17
use Psr\Log\LoggerInterface;
18
use TgScraper\Constants\Versions;
19
use Throwable;
20
21
/**
22
 * Class SchemaExtractor
23
 * @package TgScraper\Common
24
 */
25
class SchemaExtractor
26
{
27
28
    /**
29
     * Additional methods with boolean return value.
30
     */
31
    private const BOOL_RETURNS = [
32
        'answerShippingQuery',
33
        'answerPreCheckoutQuery'
34
    ];
35
36
    private Dom $dom;
37
38
    private string $version;
39
40
    /**
41
     * SchemaExtractor constructor.
42
     * @param LoggerInterface $logger
43
     * @param string $url
44
     * @throws ChildNotFoundException
45
     * @throws CircularException
46
     * @throws ClientExceptionInterface
47
     * @throws ContentLengthException
48
     * @throws LogicalException
49
     * @throws StrictException
50
     * @throws Throwable
51
     */
52
    public function __construct(private LoggerInterface $logger, private string $url = Versions::LATEST)
53
    {
54
        $this->dom = new Dom();
55
        try {
56
            $this->dom->loadFromURL($this->url);
57
        } catch (Throwable $e) {
58
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
59
            throw $e;
60
        }
61
        $this->version = $this->parseVersion();
62
        $this->logger->info('Bot API version: ' . $this->version);
63
    }
64
65
    /**
66
     * @throws ParentNotFoundException
67
     * @throws ChildNotFoundException
68
     */
69
    #[ArrayShape(['description' => "string", 'table' => "mixed", 'extended_by' => "array"])]
70
    private static function parseNode(Dom\Node\AbstractNode $node): ?array
71
    {
72
        $description = '';
73
        $table = null;
74
        $extendedBy = [];
75
        $tag = '';
76
        $sibling = $node;
77
        while (!str_starts_with($tag, 'h')) {
78
            $sibling = $sibling->nextSibling();
79
            $tag = $sibling?->tag?->name();
80
            if (empty($node->text()) or empty($tag) or $tag == 'text') {
81
                continue;
82
            } elseif ($tag == 'p') {
83
                $description .= PHP_EOL . $sibling->innerHtml();
84
            } elseif ($tag == 'ul') {
85
                $items = $sibling->find('li');
86
                /* @var Dom\Node\AbstractNode $item */
87
                foreach ($items as $item) {
88
                    $extendedBy[] = $item->innerText;
89
                }
90
                break;
91
            } elseif ($tag == 'table') {
92
                $table = $sibling->find('tbody')->find('tr');
93
                break;
94
            }
95
        }
96
        return ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy];
97
    }
98
99
    private function parseVersion(): string
100
    {
101
        /** @var Dom\Node\AbstractNode $element */
102
        $element = $this->dom->find('h3')[0];
103
        $tag = '';
104
        while ($tag != 'p') {
105
            try {
106
                $element = $element->nextSibling();
107
            } catch (ChildNotFoundException | ParentNotFoundException) {
108
                continue;
109
            }
110
            $tag = $element->tag->name();
111
        }
112
        $versionNumbers = explode('.', str_replace('Bot API ', '', $element->innerText));
113
        return sprintf(
114
            '%s.%s.%s',
115
            $versionNumbers[0] ?? '1',
116
            $versionNumbers[1] ?? '0',
117
            $versionNumbers[2] ?? '0'
118
        );
119
    }
120
121
    /**
122
     * @return array
123
     * @throws ChildNotFoundException
124
     * @throws CircularException
125
     * @throws ContentLengthException
126
     * @throws LogicalException
127
     * @throws NotLoadedException
128
     * @throws ParentNotFoundException
129
     * @throws StrictException
130
     * @throws ClientExceptionInterface
131
     * @throws Throwable
132
     */
133
    #[ArrayShape(['version' => "string", 'methods' => "array", 'types' => "array"])]
134
    public function extract(): array
135
    {
136
        try {
137
            $elements = $this->dom->find('h4');
138
        } catch (Throwable $e) {
139
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
140
            throw $e;
141
        }
142
        $data = ['version' => $this->version];
143
        /* @var Dom\Node\AbstractNode $element */
144
        foreach ($elements as $element) {
145
            if (!str_contains($name = $element->text, ' ')) {
146
                $isMethod = lcfirst($name) == $name;
147
                $path = $isMethod ? 'methods' : 'types';
148
                ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode(
149
                    $element
150
                );
151
                $data[$path][] = self::generateElement(
152
                    $name,
153
                    trim($description),
154
                    $table,
155
                    $extendedBy,
156
                    $isMethod
157
                );
158
            }
159
        }
160
        return $data;
161
    }
162
163
    /**
164
     * @param string $name
165
     * @param string $description
166
     * @param Dom\Node\Collection|null $unparsedFields
167
     * @param array $extendedBy
168
     * @param bool $isMethod
169
     * @return array
170
     * @throws ChildNotFoundException
171
     * @throws CircularException
172
     * @throws ContentLengthException
173
     * @throws LogicalException
174
     * @throws NotLoadedException
175
     * @throws StrictException
176
     */
177
    private static function generateElement(
178
        string $name,
179
        string $description,
180
        ?Dom\Node\Collection $unparsedFields,
181
        array $extendedBy,
182
        bool $isMethod
183
    ): array {
184
        $fields = self::parseFields($unparsedFields, $isMethod);
185
        $result = [
186
            'name' => $name,
187
            'description' => htmlspecialchars_decode(strip_tags($description), ENT_QUOTES),
188
            'fields' => $fields,
189
            'extended_by' => $extendedBy
190
        ];
191
        if ($isMethod) {
192
            $returnTypes = self::parseReturnTypes($description);
193
            if (empty($returnTypes) and in_array($name, self::BOOL_RETURNS)) {
194
                $returnTypes[] = 'bool';
195
            }
196
            $result['return_types'] = $returnTypes;
197
            return $result;
198
        }
199
        return $result;
200
    }
201
202
    /**
203
     * @param Dom\Node\Collection|null $fields
204
     * @param bool $isMethod
205
     * @return array
206
     * @throws ChildNotFoundException
207
     * @throws NotLoadedException
208
     */
209
    private static function parseFields(?Dom\Node\Collection $fields, bool $isMethod): array
210
    {
211
        $parsedFields = [];
212
        $fields = $fields ?? [];
213
        foreach ($fields as $field) {
214
            /* @var Dom $field */
215
            $fieldData = $field->find('td');
216
            $name = $fieldData[0]->text;
217
            if (empty($name)) {
218
                continue;
219
            }
220
            $parsedData = [
221
                'name' => $name,
222
                'type' => strip_tags($fieldData[1]->innerHtml)
223
            ];
224
            $parsedData['types'] = self::parseFieldTypes($parsedData['type']);
225
            unset($parsedData['type']);
226
            if ($isMethod) {
227
                $parsedData['required'] = $fieldData[2]->text == 'Yes';
228
                $parsedData['description'] = htmlspecialchars_decode(
229
                    strip_tags($fieldData[3]->innerHtml ?? $fieldData[3]->text ?? ''),
230
                    ENT_QUOTES
231
                );
232
            } else {
233
                $description = htmlspecialchars_decode(strip_tags($fieldData[2]->innerHtml), ENT_QUOTES);
234
                $parsedData['optional'] = str_starts_with($description, 'Optional.');
235
                $parsedData['description'] = $description;
236
            }
237
            $parsedFields[] = $parsedData;
238
        }
239
        return $parsedFields;
240
    }
241
242
    /**
243
     * @param string $rawType
244
     * @return array
245
     */
246
    private static function parseFieldTypes(string $rawType): array
247
    {
248
        $types = [];
249
        foreach (explode(' or ', $rawType) as $rawOrType) {
250
            if (stripos($rawOrType, 'array') === 0) {
251
                $types[] = str_replace(' and', ',', $rawOrType);
252
                continue;
253
            }
254
            foreach (explode(' and ', $rawOrType) as $unparsedType) {
255
                $types[] = $unparsedType;
256
            }
257
        }
258
        $parsedTypes = [];
259
        foreach ($types as $type) {
260
            $type = trim(str_replace(['number', 'of'], '', $type));
261
            $multiplesCount = substr_count(strtolower($type), 'array');
262
            $parsedType = trim(
263
                str_replace(
264
                    ['Array', 'Integer', 'String', 'Boolean', 'Float', 'True'],
265
                    ['', 'int', 'string', 'bool', 'float', 'bool'],
266
                    $type
267
                )
268
            );
269
            for ($i = 0; $i < $multiplesCount; $i++) {
270
                $parsedType = sprintf('Array<%s>', $parsedType);
271
            }
272
            $parsedTypes[] = $parsedType;
273
        }
274
        return $parsedTypes;
275
    }
276
277
    /**
278
     * @param string $description
279
     * @return array
280
     * @throws ChildNotFoundException
281
     * @throws CircularException
282
     * @throws NotLoadedException
283
     * @throws StrictException
284
     * @throws ContentLengthException
285
     * @throws LogicalException
286
     * @noinspection PhpUndefinedFieldInspection
287
     */
288
    private static function parseReturnTypes(string $description): array
289
    {
290
        $returnTypes = [];
291
        $phrases = explode('.', $description);
292
        $phrases = array_filter(
293
            $phrases,
294
            function ($phrase) {
295
                return (false !== stripos($phrase, 'returns') or false !== stripos($phrase, 'is returned'));
296
            }
297
        );
298
        foreach ($phrases as $phrase) {
299
            $dom = new Dom;
300
            $dom->loadStr($phrase);
301
            $a = $dom->find('a');
302
            $em = $dom->find('em');
303
            foreach ($a as $element) {
304
                if ($element->text == 'Messages') {
305
                    $returnTypes[] = 'Array<Message>';
306
                    continue;
307
                }
308
309
                $multiplesCount = substr_count(strtolower($phrase), 'array');
310
                $returnType = $element->text;
311
                for ($i = 0; $i < $multiplesCount; $i++) {
312
                    $returnType = sprintf('Array<%s>', $returnType);
313
                }
314
                $returnTypes[] = $returnType;
315
            }
316
            foreach ($em as $element) {
317
                if (in_array($element->text, ['False', 'force', 'Array'])) {
318
                    continue;
319
                }
320
                $type = str_replace(['True', 'Int', 'String'], ['bool', 'int', 'string'], $element->text);
321
                $returnTypes[] = $type;
322
            }
323
        }
324
        return $returnTypes;
325
    }
326
327
}