DOMSelector::getConfig()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 1
cts 1
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace DOMSelector;
6
7
use DOMSelector\Contracts\FormatterInterface;
8
use DOMSelector\Providers\TypeProvider;
9
use Exception;
10
use PHPHtmlParser\Dom;
11
use Psr\Http\Client\ClientExceptionInterface;
12
use Psr\Http\Client\ClientInterface;
13
14
/**
15
 * Class DOMSelector.
16
 */
17
class DOMSelector
18
{
19
    /**
20
     * @var array
21
     */
22
    private $config = [];
23
24
    /**
25
     * @var array
26
     */
27
    private $formatters = [];
28
29
    /**
30
     * Selector DOM class.
31 48
     *
32
     * @var Dom
33 48
     */
34
    private $dom;
35 48
36 15
    /**
37 15
     * Type Handler Class.
38 15
     *
39
     * @var TypeProvider
40
     */
41
    protected $typeProvider;
42 48
43
    /**
44
     * DOMSelector constructor.
45
     *
46
     * @param array $config
47 48
     * @param array $formatters
48
     */
49 48
    public function __construct(array $config, array $formatters = [])
50
    {
51 48
        $this->config = $config;
52
        $this->dom = new Dom();
53
        $this->typeProvider = new TypeProvider();
54
55
        if (!empty($formatters)) {
56
            foreach ($formatters as $formatter) {
57 3
                if ($formatter instanceof FormatterInterface) {
58
                    $this->formatters[$formatter->getName()] = $formatter;
59 3
                }
60
            }
61 3
        }
62
    }
63
64
    /**
65
     * Create Extractor object from yaml string.
66
     */
67
    public static function fromYamlString(string $yaml_string, array $formatters = []): DOMSelector
68
    {
69 3
        $config = \yaml_parse($yaml_string);
70
71 3
        return new DOMSelector($config, $formatters);
72
    }
73
74
    /**
75
     * Create Extractor object from yaml file.
76
     */
77
    public static function fromYamlFile(string $yaml_file, array $formatters = []): DOMSelector
78
    {
79 3
        $config = \yaml_parse_file($yaml_file);
80
81 3
        return new DOMSelector($config, $formatters);
82
    }
83
84
    /**
85
     * Get config.
86
     *
87
     * @return array
88
     */
89
    public function getConfig(): array
90
    {
91 15
        return $this->config;
92
    }
93 15
94
    /**
95
     * Get all formatters.
96
     *
97
     * @return array
98
     */
99
    public function getFormatters(): array
100
    {
101
        return $this->formatters;
102
    }
103
104
    /**
105 42
     * Get specific formatter.
106
     *
107 42
     * @return false|mixed|FormatterInterface
108
     */
109 42
    public function getFormatter(string $formatter)
110
    {
111 42
        return $this->formatters[$formatter] ?? false;
112
    }
113 42
114 42
    /**
115
     * Get formatters from config.
116
     *
117 42
     * @param string|array $items
118
     *
119
     * @return array
120
     */
121
    protected function getFormettersFromConfig($items): array
122
    {
123
        $formatters = [];
124
125
        if (!is_array($items)) {
126
            $items = [$items];
127
        }
128 42
129
        foreach ($items as $item) {
130 42
            $formatter = $this->getFormatter($item);
131
            if ($formatter) {
132
                $formatters[$item] = $formatter;
133 42
            }
134 3
        }
135
136
        return $formatters;
137 42
    }
138 3
139
    /**
140
     * Extract config items from HTML string.
141 39
     *
142 9
     * @param string|Dom $html
143
     *
144 33
     * @return array
145
     */
146
    public function extract($html): array
147 39
    {
148
        if (!$html instanceof Dom) {
149 39
            $this->dom->loadStr($html);
150 39
        }
151 6
152
        $fields_data = [];
153 39
154
        foreach ($this->config as $field_name => $field_config) {
155 39
            $fields_data[$field_name] = $this->extractSelector($field_config, $this->dom);
156 12
        }
157 3
158
        return $fields_data;
159
    }
160 12
161 12
    /**
162 12
     * Extract config items from HTML file.
163
     *
164
     * @param string $file
165
     *
166
     * @throws Exception
167 39
     *
168
     * @return array
169
     */
170 39
    public function extractFromFile(string $file): array
171 6
    {
172
        try {
173 36
            $this->dom->loadFromFile($file);
174
        } catch (Exception $e) {
175
            throw new Exception($e->getMessage());
176
        }
177 6
178
        return $this->extract($this->dom);
179
    }
180
181
    /**
182
     * Extract config items from url.
183
     *
184
     * @param string                     $url
185
     * @param ClientInterface|null|mixed $client
186
     *
187
     * @throws Exception|ClientExceptionInterface
188
     *
189 39
     * @return array
190
     */
191 39
    public function extractFromUrl(string $url, $client = null): array
192
    {
193 39
        try {
194 9
            $this->dom->loadFromUrl($url, null, $client);
195 30
        } catch (Exception $e) {
196 3
            throw new Exception($e->getMessage());
197 30
        }
198 3
199 27
        return $this->extract($this->dom);
200 3
    }
201 27
202 27
    /**
203
     * Extract selector.
204
     *
205 39
     * @param array     $field_config
206
     * @param Dom|mixed $dom
207 12
     *
208 12
     * @return array|string|bool
209
     */
210
    protected function extractSelector(array $field_config, $dom)
211
    {
212 39
        try {
213
            $elements = $dom->find($field_config['css']);
214
        } catch (Exception $e) {
215
            $elements = [];
216
        }
217
218
        $item_type = $this->typeProvider->getType($field_config['type'] ?? '') ? $field_config['type'] : 'Text';
219
220
        $values = [];
221
222
        foreach ($elements as $element) {
223 6
            if (isset($field_config['children'])) {
224
                $value = $this->getChildItem($field_config, $element);
225 6
            } else {
226 6
                $formatters = $this->getFormettersFromConfig($field_config['format'] ?? []);
227
228 6
                $value = $this->extractField($element, $item_type, $field_config['attribute'] ?? null, $formatters);
229 6
            }
230 6
231
            if (isset($field_config['multiple']) && $field_config['multiple'] === true) {
232
                $values[] = $value;
233 6
            } else {
234
                return $value;
235
            }
236
        }
237
238
        return $values;
239
    }
240
241
    /**
242
     * Extract field.
243
     *
244
     * @param mixed  $element
245
     * @param string $item_type
246
     * @param mixed  $attribute
247
     * @param array  $formatters
248
     *
249
     * @return false|mixed|string
250
     */
251
    protected function extractField($element, string $item_type, $attribute = null, array $formatters = [])
252
    {
253
        $content = false;
254
255
        $type = $this->typeProvider->getType($item_type);
256
257
        if ($type) {
258
            $content = $type->getContent($element, $attribute);
259
        }
260
261
        if (!empty($formatters) && $content) {
262
            /** @var FormatterInterface $formatter */
263
            foreach ($formatters as $formatter) {
264
                $content = $formatter->format($content);
265
            }
266
        }
267
268
        return $content;
269
    }
270
271
    /**
272
     * Get child item.
273
     *
274
     * @param array $field_config
275
     * @param mixed $element
276
     *
277
     * @return array
278
     */
279
    public function getChildItem(array $field_config, $element): array
280
    {
281
        $child_config = $field_config['children'];
282
        $child_item = [];
283
284
        foreach ($child_config as $config_name => $config_fields) {
285
            $child_value = $this->extractSelector($config_fields, $element);
286
            $child_item[$config_name] = $child_value;
287
        }
288
289
        return $child_item;
290
    }
291
}
292