Passed
Push — master ( cad00a...5b38b0 )
by Sys
01:58
created

SchemaExtractor::extract()   B

Complexity

Conditions 6
Paths 6

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 24
c 1
b 0
f 0
dl 0
loc 32
rs 8.9137
cc 6
nc 6
nop 0
1
<?php
2
3
4
namespace TgScraper\Common;
5
6
7
use JetBrains\PhpStorm\ArrayShape;
8
use PHPHtmlParser\Dom;
9
use PHPHtmlParser\Exceptions\ChildNotFoundException;
10
use PHPHtmlParser\Exceptions\CircularException;
11
use PHPHtmlParser\Exceptions\ContentLengthException;
12
use PHPHtmlParser\Exceptions\LogicalException;
13
use PHPHtmlParser\Exceptions\NotLoadedException;
14
use PHPHtmlParser\Exceptions\ParentNotFoundException;
15
use PHPHtmlParser\Exceptions\StrictException;
16
use Psr\Http\Client\ClientExceptionInterface;
17
use Psr\Log\LoggerInterface;
18
use TgScraper\Constants\Versions;
19
use Throwable;
20
21
/**
22
 * Class SchemaExtractor
23
 * @package TgScraper\Common
24
 */
25
class SchemaExtractor
26
{
27
28
    /**
29
     * Additional methods with boolean return value.
30
     */
31
    private const BOOL_RETURNS = [
32
        'answerShippingQuery',
33
        'answerPreCheckoutQuery'
34
    ];
35
36
    /**
37
     * SchemaExtractor constructor.
38
     * @param LoggerInterface $logger
39
     * @param string $url
40
     */
41
    public function __construct(private LoggerInterface $logger, private string $url = Versions::LATEST)
42
    {
43
    }
44
45
    /**
46
     * @throws ParentNotFoundException
47
     * @throws ChildNotFoundException
48
     */
49
    #[ArrayShape(['description' => "string", 'table' => "mixed", 'extended_by' => "array"])]
50
    private function parseNode(Dom\Node\AbstractNode $node): ?array
51
    {
52
        $description = '';
53
        $table = null;
54
        $extendedBy = [];
55
        $tag = '';
56
        $sibling = $node;
57
        while (!str_starts_with($tag, 'h')) {
58
            $sibling = $sibling->nextSibling();
59
            $tag = $sibling?->tag?->name();
60
            if (empty($node->text()) or empty($tag) or $tag == 'text') {
61
                continue;
62
            } elseif ($tag == 'p') {
63
                $description .= PHP_EOL . $sibling->innerHtml();
64
            } elseif ($tag == 'ul') {
65
                $items = $sibling->find('li');
66
                /* @var Dom\Node\AbstractNode $item */
67
                foreach ($items as $item) {
68
                    $extendedBy[] = $item->innerText;
69
                }
70
                break;
71
            } elseif ($tag == 'table') {
72
                $table = $sibling->find('tbody')->find('tr');
73
                break;
74
            }
75
        }
76
        return ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy];
77
    }
78
79
    /**
80
     * @return array
81
     * @throws ChildNotFoundException
82
     * @throws CircularException
83
     * @throws ContentLengthException
84
     * @throws LogicalException
85
     * @throws NotLoadedException
86
     * @throws ParentNotFoundException
87
     * @throws StrictException
88
     * @throws ClientExceptionInterface
89
     * @throws Throwable
90
     */
91
    public function extract(): array
92
    {
93
        $dom = new Dom;
94
        try {
95
            $dom->loadFromURL($this->url);
96
        } catch (Throwable $e) {
97
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
98
            throw $e;
99
        }
100
        try {
101
            $elements = $dom->find('h4');
102
        } catch (Throwable $e) {
103
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
104
            throw $e;
105
        }
106
        $data = [];
107
        /* @var Dom\Node\AbstractNode $element */
108
        foreach ($elements as $element) {
109
            if (!str_contains($name = $element->text, ' ')) {
110
                $isMethod = lcfirst($name) == $name;
111
                $path = $isMethod ? 'methods' : 'types';
112
                ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode($element);
0 ignored issues
show
Bug Best Practice introduced by
The method TgScraper\Common\SchemaExtractor::parseNode() is not static, but was called statically. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

112
                /** @scrutinizer ignore-call */ 
113
                ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode($element);
Loading history...
113
                $data[$path][] = self::generateElement(
114
                    $name,
115
                    trim($description),
116
                    $table,
117
                    $extendedBy,
118
                    $isMethod
119
                );
120
            }
121
        }
122
        return $data;
123
    }
124
125
    /**
126
     * @param string $name
127
     * @param string $description
128
     * @param Dom\Node\Collection|null $unparsedFields
129
     * @param array $extendedBy
130
     * @param bool $isMethod
131
     * @return array
132
     * @throws ChildNotFoundException
133
     * @throws CircularException
134
     * @throws ContentLengthException
135
     * @throws LogicalException
136
     * @throws NotLoadedException
137
     * @throws StrictException
138
     */
139
    private static function generateElement(
140
        string $name,
141
        string $description,
142
        ?Dom\Node\Collection $unparsedFields,
143
        array $extendedBy,
144
        bool $isMethod
145
    ): array {
146
        $fields = self::parseFields($unparsedFields, $isMethod);
147
        $result = [
148
            'name' => $name,
149
            'description' => htmlspecialchars_decode(strip_tags($description), ENT_QUOTES),
150
            'fields' => $fields,
151
            'extended_by' => $extendedBy
152
        ];
153
        if ($isMethod) {
154
            $returnTypes = self::parseReturnTypes($description);
155
            if (empty($returnTypes) and in_array($name, self::BOOL_RETURNS)) {
156
                $returnTypes[] = 'bool';
157
            }
158
            $result['return_types'] = $returnTypes;
159
            return $result;
160
        }
161
        return $result;
162
    }
163
164
    /**
165
     * @param Dom\Node\Collection|null $fields
166
     * @param bool $isMethod
167
     * @return array
168
     * @throws ChildNotFoundException
169
     * @throws NotLoadedException
170
     */
171
    private static function parseFields(?Dom\Node\Collection $fields, bool $isMethod): array
172
    {
173
        $parsedFields = [];
174
        $fields = $fields ?? [];
175
        foreach ($fields as $field) {
176
            /* @var Dom $field */
177
            $fieldData = $field->find('td');
178
            $name = $fieldData[0]->text;
179
            if (empty($name)) {
180
                continue;
181
            }
182
            $parsedData = [
183
                'name' => $name,
184
                'type' => strip_tags($fieldData[1]->innerHtml)
185
            ];
186
            $parsedData['types'] = self::parseFieldTypes($parsedData['type']);
187
            unset($parsedData['type']);
188
            if ($isMethod) {
189
                $parsedData['required'] = $fieldData[2]->text == 'Yes';
190
                $parsedData['description'] = htmlspecialchars_decode(
191
                    strip_tags($fieldData[3]->innerHtml ?? $fieldData[3]->text ?? ''),
192
                    ENT_QUOTES
193
                );
194
            } else {
195
                $description = htmlspecialchars_decode(strip_tags($fieldData[2]->innerHtml), ENT_QUOTES);
196
                $parsedData['optional'] = str_starts_with($description, 'Optional.');
197
                $parsedData['description'] = $description;
198
            }
199
            $parsedFields[] = $parsedData;
200
        }
201
        return $parsedFields;
202
    }
203
204
    /**
205
     * @param string $rawType
206
     * @return array
207
     */
208
    private static function parseFieldTypes(string $rawType): array
209
    {
210
        $types = [];
211
        foreach (explode(' or ', $rawType) as $rawOrType) {
212
            if (stripos($rawOrType, 'array') === 0) {
213
                $types[] = str_replace(' and', ',', $rawOrType);
214
                continue;
215
            }
216
            foreach (explode(' and ', $rawOrType) as $unparsedType) {
217
                $types[] = $unparsedType;
218
            }
219
        }
220
        $parsedTypes = [];
221
        foreach ($types as $type) {
222
            $type = trim(str_replace(['number', 'of'], '', $type));
223
            $multiplesCount = substr_count(strtolower($type), 'array');
224
            $parsedType = trim(
225
                str_replace(
226
                    ['Array', 'Integer', 'String', 'Boolean', 'Float', 'True'],
227
                    ['', 'int', 'string', 'bool', 'float', 'bool'],
228
                    $type
229
                )
230
            );
231
            for ($i = 0; $i < $multiplesCount; $i++) {
232
                $parsedType = sprintf('Array<%s>', $parsedType);
233
            }
234
            $parsedTypes[] = $parsedType;
235
        }
236
        return $parsedTypes;
237
    }
238
239
    /**
240
     * @param string $description
241
     * @return array
242
     * @throws ChildNotFoundException
243
     * @throws CircularException
244
     * @throws NotLoadedException
245
     * @throws StrictException
246
     * @throws ContentLengthException
247
     * @throws LogicalException
248
     * @noinspection PhpUndefinedFieldInspection
249
     */
250
    private static function parseReturnTypes(string $description): array
251
    {
252
        $returnTypes = [];
253
        $phrases = explode('.', $description);
254
        $phrases = array_filter(
255
            $phrases,
256
            function ($phrase) {
257
                return (false !== stripos($phrase, 'returns') or false !== stripos($phrase, 'is returned'));
258
            }
259
        );
260
        foreach ($phrases as $phrase) {
261
            $dom = new Dom;
262
            $dom->loadStr($phrase);
263
            $a = $dom->find('a');
264
            $em = $dom->find('em');
265
            foreach ($a as $element) {
266
                if ($element->text == 'Messages') {
267
                    $returnTypes[] = 'Array<Message>';
268
                    continue;
269
                }
270
271
                $multiplesCount = substr_count(strtolower($phrase), 'array');
272
                $returnType = $element->text;
273
                for ($i = 0; $i < $multiplesCount; $i++) {
274
                    $returnType = sprintf('Array<%s>', $returnType);
275
                }
276
                $returnTypes[] = $returnType;
277
            }
278
            foreach ($em as $element) {
279
                if (in_array($element->text, ['False', 'force', 'Array'])) {
280
                    continue;
281
                }
282
                $type = str_replace(['True', 'Int', 'String'], ['bool', 'int', 'string'], $element->text);
283
                $returnTypes[] = $type;
284
            }
285
        }
286
        return $returnTypes;
287
    }
288
289
}