Passed
Push — master ( 4bf53d...ce4836 )
by Sys
09:47
created

SchemaExtractor   A

Complexity

Total Complexity 37

Size/Duplication

Total Lines 245
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 37
eloc 125
c 1
b 0
f 0
dl 0
loc 245
rs 9.44

6 Methods

Rating   Name   Duplication   Size   Complexity  
B parseReturnTypes() 0 37 8
B parseFieldTypes() 0 29 6
A parseFields() 0 31 4
A generateElement() 0 21 4
A __construct() 0 2 1
C extract() 0 52 14
1
<?php
2
3
4
namespace TgScraper\Common;
5
6
7
use PHPHtmlParser\Dom;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\ContentLengthException;
11
use PHPHtmlParser\Exceptions\LogicalException;
12
use PHPHtmlParser\Exceptions\NotLoadedException;
13
use PHPHtmlParser\Exceptions\ParentNotFoundException;
14
use PHPHtmlParser\Exceptions\StrictException;
15
use Psr\Http\Client\ClientExceptionInterface;
16
use Psr\Log\LoggerInterface;
17
use TgScraper\Constants\Versions;
18
use Throwable;
19
20
/**
21
 * Class SchemaExtractor
22
 * @package TgScraper\Common
23
 */
24
class SchemaExtractor
25
{
26
27
    /**
28
     * Additional methods with boolean return value.
29
     */
30
    private const BOOL_RETURNS = [
31
        'answerShippingQuery',
32
        'answerPreCheckoutQuery'
33
    ];
34
35
    /**
36
     * SchemaExtractor constructor.
37
     * @param LoggerInterface $logger
38
     * @param string $url
39
     */
40
    public function __construct(private LoggerInterface $logger, private string $url = Versions::LATEST)
41
    {
42
    }
43
44
    /**
45
     * @return array
46
     * @throws ChildNotFoundException
47
     * @throws CircularException
48
     * @throws ContentLengthException
49
     * @throws LogicalException
50
     * @throws NotLoadedException
51
     * @throws ParentNotFoundException
52
     * @throws StrictException
53
     * @throws ClientExceptionInterface
54
     * @throws Throwable
55
     */
56
    public function extract(): array
57
    {
58
        $dom = new Dom;
59
        try {
60
            $dom->loadFromURL($this->url);
61
        } catch (Throwable $e) {
62
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
63
            throw $e;
64
        }
65
        try {
66
            $elements = $dom->find('h4');
67
        } catch (Throwable $e) {
68
            $this->logger->critical(sprintf('Unable to load data from URL "%s": %s', $this->url, $e->getMessage()));
69
            throw $e;
70
        }
71
        $data = [];
72
        /* @var Dom\Node\AbstractNode $element */
73
        foreach ($elements as $element) {
74
            if (!str_contains($name = $element->text, ' ')) {
75
                $isMethod = lcfirst($name) == $name;
76
                $path = $isMethod ? 'methods' : 'types';
77
                $temp = $element;
78
                $description = '';
79
                $table = null;
80
                while (true) {
81
                    try {
82
                        $element = $element->nextSibling();
83
                    } catch (ChildNotFoundException) {
84
                        break;
85
                    }
86
                    $tag = $element->tag->name() ?? null;
87
                    if (empty($temp->text()) or empty($tag) or $tag == 'text') {
88
                        continue;
89
                    } elseif (str_starts_with($tag, 'h')) {
90
                        break;
91
                    } elseif ($tag == 'p') {
92
                        $description .= PHP_EOL . $element->innerHtml();
93
                    } elseif ($tag == 'table') {
94
                        $table = $element->find('tbody')->find('tr');
95
                        break;
96
                    }
97
                }
98
                /* @var Dom\Node\AbstractNode $element */
99
                $data[$path][] = self::generateElement(
100
                    $name,
101
                    trim($description),
102
                    $table,
103
                    $isMethod
104
                );
105
            }
106
        }
107
        return $data;
108
    }
109
110
    /**
111
     * @param string $name
112
     * @param string $description
113
     * @param Dom\Node\Collection|null $unparsedFields
114
     * @param bool $isMethod
115
     * @return array
116
     * @throws ChildNotFoundException
117
     * @throws CircularException
118
     * @throws ContentLengthException
119
     * @throws LogicalException
120
     * @throws NotLoadedException
121
     * @throws StrictException
122
     */
123
    private static function generateElement(
124
        string $name,
125
        string $description,
126
        ?Dom\Node\Collection $unparsedFields,
127
        bool $isMethod
128
    ): array {
129
        $fields = self::parseFields($unparsedFields, $isMethod);
130
        $result = [
131
            'name' => $name,
132
            'description' => htmlspecialchars_decode(strip_tags($description), ENT_QUOTES),
133
            'fields' => $fields
134
        ];
135
        if ($isMethod) {
136
            $returnTypes = self::parseReturnTypes($description);
137
            if (empty($returnTypes) and in_array($name, self::BOOL_RETURNS)) {
138
                $returnTypes[] = 'bool';
139
            }
140
            $result['return_types'] = $returnTypes;
141
            return $result;
142
        }
143
        return $result;
144
    }
145
146
    /**
147
     * @param Dom\Node\Collection|null $fields
148
     * @param bool $isMethod
149
     * @return array
150
     * @throws ChildNotFoundException
151
     * @throws NotLoadedException
152
     */
153
    private static function parseFields(?Dom\Node\Collection $fields, bool $isMethod): array
154
    {
155
        $parsedFields = [];
156
        $fields = $fields ?? [];
157
        foreach ($fields as $field) {
158
            /* @var Dom $field */
159
            $fieldData = $field->find('td');
160
            $name = $fieldData[0]->text;
161
            if (empty($name)) {
162
                continue;
163
            }
164
            $parsedData = [
165
                'name' => $name,
166
                'type' => strip_tags($fieldData[1]->innerHtml)
167
            ];
168
            $parsedData['types'] = self::parseFieldTypes($parsedData['type']);
169
            unset($parsedData['type']);
170
            if ($isMethod) {
171
                $parsedData['required'] = $fieldData[2]->text == 'Yes';
172
                $parsedData['description'] = htmlspecialchars_decode(
173
                    strip_tags($fieldData[3]->innerHtml ?? $fieldData[3]->text ?? ''),
174
                    ENT_QUOTES
175
                );
176
            } else {
177
                $description = htmlspecialchars_decode(strip_tags($fieldData[2]->innerHtml), ENT_QUOTES);
178
                $parsedData['optional'] = str_starts_with($description, 'Optional.');
179
                $parsedData['description'] = $description;
180
            }
181
            $parsedFields[] = $parsedData;
182
        }
183
        return $parsedFields;
184
    }
185
186
    /**
187
     * @param string $rawType
188
     * @return array
189
     */
190
    private static function parseFieldTypes(string $rawType): array
191
    {
192
        $types = [];
193
        foreach (explode(' or ', $rawType) as $rawOrType) {
194
            if (stripos($rawOrType, 'array') === 0) {
195
                $types[] = str_replace(' and', ',', $rawOrType);
196
                continue;
197
            }
198
            foreach (explode(' and ', $rawOrType) as $unparsedType) {
199
                $types[] = $unparsedType;
200
            }
201
        }
202
        $parsedTypes = [];
203
        foreach ($types as $type) {
204
            $type = trim(str_replace(['number', 'of'], '', $type));
205
            $multiplesCount = substr_count(strtolower($type), 'array');
206
            $parsedType = trim(
207
                str_replace(
208
                    ['Array', 'Integer', 'String', 'Boolean', 'Float', 'True'],
209
                    ['', 'int', 'string', 'bool', 'float', 'bool'],
210
                    $type
211
                )
212
            );
213
            for ($i = 0; $i < $multiplesCount; $i++) {
214
                $parsedType = sprintf('Array<%s>', $parsedType);
215
            }
216
            $parsedTypes[] = $parsedType;
217
        }
218
        return $parsedTypes;
219
    }
220
221
    /**
222
     * @param string $description
223
     * @return array
224
     * @throws ChildNotFoundException
225
     * @throws CircularException
226
     * @throws NotLoadedException
227
     * @throws StrictException
228
     * @throws ContentLengthException
229
     * @throws LogicalException
230
     * @noinspection PhpUndefinedFieldInspection
231
     */
232
    private static function parseReturnTypes(string $description): array
233
    {
234
        $returnTypes = [];
235
        $phrases = explode('.', $description);
236
        $phrases = array_filter(
237
            $phrases,
238
            function ($phrase) {
239
                return (false !== stripos($phrase, 'returns') or false !== stripos($phrase, 'is returned'));
240
            }
241
        );
242
        foreach ($phrases as $phrase) {
243
            $dom = new Dom;
244
            $dom->loadStr($phrase);
245
            $a = $dom->find('a');
246
            $em = $dom->find('em');
247
            foreach ($a as $element) {
248
                if ($element->text == 'Messages') {
249
                    $returnTypes[] = 'Array<Message>';
250
                    continue;
251
                }
252
253
                $multiplesCount = substr_count(strtolower($phrase), 'array');
254
                $returnType = $element->text;
255
                for ($i = 0; $i < $multiplesCount; $i++) {
256
                    $returnType = sprintf('Array<%s>', $returnType);
257
                }
258
                $returnTypes[] = $returnType;
259
            }
260
            foreach ($em as $element) {
261
                if (in_array($element->text, ['False', 'force', 'Array'])) {
262
                    continue;
263
                }
264
                $type = str_replace(['True', 'Int', 'String'], ['bool', 'int', 'string'], $element->text);
265
                $returnTypes[] = $type;
266
            }
267
        }
268
        return $returnTypes;
269
    }
270
271
}