SchemaExtractor::parseFields()   A
last analyzed

Complexity

Conditions 4
Paths 4

Size

Total Lines 24
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 17
c 2
b 0
f 0
dl 0
loc 24
rs 9.7
cc 4
nc 4
nop 2
1
<?php
2
3
namespace TgScraper\Common;
4
5
use Composer\InstalledVersions;
6
use Exception;
7
use GuzzleHttp\Client;
8
use GuzzleHttp\Exception\GuzzleException;
9
use InvalidArgumentException;
10
use OutOfBoundsException;
11
use Psr\Log\LoggerInterface;
12
use RuntimeException;
13
use TgScraper\Parsers\Field;
14
use TgScraper\Parsers\ObjectDescription;
15
use TgScraper\Constants\Versions;
16
use voku\helper\HtmlDomParser;
17
use voku\helper\SimpleHtmlDomInterface;
18
use voku\helper\SimpleHtmlDomNode;
19
use voku\helper\SimpleHtmlDomNodeInterface;
20
21
/**
22
 * Class SchemaExtractor
23
 * @package TgScraper\Common
24
 */
25
class SchemaExtractor
26
{
27
    /**
28
     * @var string
29
     */
30
    private string $version;
31
32
    /**
33
     * SchemaExtractor constructor.
34
     * @param LoggerInterface $logger
35
     * @param HtmlDomParser $dom
36
     */
37
    public function __construct(private LoggerInterface $logger, private HtmlDomParser $dom)
38
    {
39
        $this->version = $this->parseVersion();
40
        $this->logger->info('Bot API version: ' . $this->version);
41
    }
42
43
44
    /**
45
     * @param LoggerInterface $logger
46
     * @param string $version
47
     * @return SchemaExtractor
48
     * @throws OutOfBoundsException
49
     * @throws Exception
50
     * @throws GuzzleException
51
     */
52
    public static function fromVersion(LoggerInterface $logger, string $version = Versions::LATEST): SchemaExtractor
53
    {
54
        if (InstalledVersions::isInstalled('sysbot/tgscraper-cache') and class_exists('\TgScraper\Cache\CacheLoader')) {
55
            $logger->info('Cache package detected, searching for a cached version.');
56
            try {
57
                /** @noinspection PhpFullyQualifiedNameUsageInspection */
58
                /** @noinspection PhpUndefinedNamespaceInspection */
59
                /** @psalm-suppress UndefinedClass */
60
                $path = \TgScraper\Cache\CacheLoader::getCachedVersion($version);
61
                $logger->info('Cached version found.');
62
                return self::fromFile($logger, $path);
63
            } catch (OutOfBoundsException) {
64
                $logger->info('Cached version not found, continuing with URL.');
65
            }
66
        }
67
        $url = Versions::getUrlFromText($version);
68
        $logger->info(sprintf('Using URL: %s', $url));
69
        return self::fromUrl($logger, $url);
70
    }
71
72
    /**
73
     * @param LoggerInterface $logger
74
     * @param string $path
75
     * @return SchemaExtractor
76
     * @throws InvalidArgumentException
77
     * @throws RuntimeException
78
     */
79
    public static function fromFile(LoggerInterface $logger, string $path): SchemaExtractor
80
    {
81
        if (!file_exists($path) or is_dir($path)) {
82
            throw new InvalidArgumentException('File not found');
83
        }
84
        $path = realpath($path);
85
        try {
86
            $logger->info(sprintf('Loading data from file "%s".', $path));
87
            $dom = HtmlDomParser::file_get_html($path);
88
            $logger->info('Data loaded.');
89
        } catch (RuntimeException $e) {
90
            $logger->critical(sprintf('Unable to load data from "%s": %s', $path, $e->getMessage()));
91
            throw $e;
92
        }
93
        return new self($logger, $dom);
94
    }
95
96
    /**
97
     * @param LoggerInterface $logger
98
     * @param string $url
99
     * @return SchemaExtractor
100
     * @throws GuzzleException
101
     */
102
    public static function fromUrl(LoggerInterface $logger, string $url): SchemaExtractor
103
    {
104
        $client = new Client();
105
        try {
106
            $html = $client->get($url)->getBody();
107
            $dom = HtmlDomParser::str_get_html((string)$html);
108
        } catch (GuzzleException $e) {
109
            $logger->critical(sprintf('Unable to load data from URL "%s": %s', $url, $e->getMessage()));
110
            throw $e;
111
        }
112
        $logger->info(sprintf('Data loaded from "%s".', $url));
113
        return new self($logger, $dom);
114
    }
115
116
    /**
117
     * @param SimpleHtmlDomInterface $node
118
     * @return array{description: string, table: ?SimpleHtmlDomNodeInterface, extended_by: string[]}
119
     */
120
    private static function parseNode(SimpleHtmlDomInterface $node): array
121
    {
122
        $description = '';
123
        $table = null;
124
        $extendedBy = [];
125
        $tag = '';
126
        $sibling = $node;
127
        while (!str_starts_with($tag ?? '', 'h')) {
128
            $sibling = $sibling?->nextSibling();
129
            $tag = $sibling?->tag;
130
            if (empty($node->text()) or empty($tag) or $tag == 'text' or empty($sibling)) {
131
                continue;
132
            }
133
            switch ($tag) {
134
                case 'p':
135
                    $description .= PHP_EOL . $sibling->innerHtml();
136
                    break;
137
                case 'ul':
138
                    $items = $sibling->findMulti('li');
139
                    foreach ($items as $item) {
140
                        $extendedBy[] = $item->text();
141
                    }
142
                    break 2;
143
                case 'table':
144
                    /** @var SimpleHtmlDomNodeInterface $table */
145
                    $table = $sibling->findOne('tbody')->findMulti('tr');
146
                    break 2;
147
            }
148
        }
149
        return ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy];
150
    }
151
152
    /**
153
     * @return string
154
     */
155
    private function parseVersion(): string
156
    {
157
        $element = $this->dom->findOne('h3');
158
        $tag = '';
159
        while ($tag != 'p' and !empty($element)) {
160
            $element = $element->nextSibling();
161
            $tag = $element?->tag;
162
        }
163
        if (empty($element)) {
164
            return '1.0.0';
165
        }
166
        $versionNumbers = explode('.', str_replace('Bot API ', '', $element->text()));
167
        return sprintf(
168
            '%s.%s.%s',
169
            $versionNumbers[0] ?? '1',
170
            $versionNumbers[1] ?? '0',
171
            $versionNumbers[2] ?? '0'
172
        );
173
    }
174
175
    /**
176
     * @return string
177
     */
178
    public function getVersion(): string
179
    {
180
        return $this->version;
181
    }
182
183
    /**
184
     * @return array{version: string, methods: array, types: array}
185
     * @throws Exception
186
     */
187
    public function extract(): array
188
    {
189
        $elements = $this->dom->findMultiOrFalse('h4');
190
        if (false === $elements) {
191
            throw new Exception('Unable to fetch required DOM nodes');
192
        }
193
        $data = ['version' => $this->version, 'methods' => [], 'types' => []];
194
        foreach ($elements as $element) {
195
            if (!str_contains($name = $element->text(), ' ')) {
196
                $isMethod = lcfirst($name) == $name;
197
                $path = $isMethod ? 'methods' : 'types';
198
                ['description' => $description, 'table' => $table, 'extended_by' => $extendedBy] = self::parseNode(
199
                    $element
200
                );
201
                $data[$path][] = self::generateElement(
202
                    $name,
203
                    trim($description),
204
                    $table,
205
                    $extendedBy,
206
                    $isMethod
207
                );
208
            }
209
        }
210
        return $data;
211
    }
212
213
    /**
214
     * @param string $name
215
     * @param string $description
216
     * @param SimpleHtmlDomNodeInterface|null $unparsedFields
217
     * @param array $extendedBy
218
     * @param bool $isMethod
219
     * @return array
220
     */
221
    private static function generateElement(
222
        string $name,
223
        string $description,
224
        ?SimpleHtmlDomNodeInterface $unparsedFields,
225
        array $extendedBy,
226
        bool $isMethod
227
    ): array {
228
        $fields = self::parseFields($unparsedFields, $isMethod);
229
        $result = [
230
            'name' => $name,
231
            'description' => htmlspecialchars_decode(strip_tags($description), ENT_QUOTES),
232
            'fields' => $fields
233
        ];
234
        if ($isMethod) {
235
            $description = new ObjectDescription($description);
236
            $returnTypes = $description->getTypes();
237
            $result['return_types'] = $returnTypes;
238
            return $result;
239
        }
240
        $result['extended_by'] = $extendedBy;
241
        return $result;
242
    }
243
244
    /**
245
     * @param SimpleHtmlDomNodeInterface|null $fields
246
     * @param bool $isMethod
247
     * @return array
248
     */
249
    private static function parseFields(?SimpleHtmlDomNodeInterface $fields, bool $isMethod): array
250
    {
251
        $parsedFields = [];
252
        $fields ??= [];
253
        /** @var SimpleHtmlDomInterface $field */
254
        foreach ($fields as $field) {
255
            /** @var SimpleHtmlDomNode $fieldData */
256
            $fieldData = $field->findMulti('td');
257
            $name = $fieldData[0]->text();
258
            if (empty($name)) {
259
                continue;
260
            }
261
            $types = $fieldData[1]->text();
262
            if ($isMethod) {
263
                $optional = $fieldData[2]->text() != 'Yes';
264
                $description = $fieldData[3]->innerHtml();
265
            } else {
266
                $description = $fieldData[2]->innerHtml();
267
                $optional = str_starts_with($fieldData[2]->text(), 'Optional.');
268
            }
269
            $field = new Field($name, $types, $optional, $description);
270
            $parsedFields[] = $field->toArray();
271
        }
272
        return $parsedFields;
273
    }
274
}
275