Passed
Push — master ( 2d4116...8cb3c8 )
by Yahya
05:38
created

DOMSelector::extractSelector()   B

Complexity

Conditions 7
Paths 20

Size

Total Lines 29
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 7

Importance

Changes 1
Bugs 1 Features 0
Metric Value
eloc 17
c 1
b 1
f 0
dl 0
loc 29
rs 8.8333
ccs 8
cts 8
cp 1
cc 7
nc 20
nop 2
crap 7
1
<?php
2
3
declare(strict_types=1);
4
5
namespace DOMSelector;
6
7
use DOMSelector\Contracts\FormatterInterface;
8
use DOMSelector\Providers\TypeProvider;
9
use Exception;
10
use PHPHtmlParser\Dom;
11
use Psr\Http\Client\ClientExceptionInterface;
12
use Psr\Http\Client\ClientInterface;
13
14
/**
15
 * Class DOMSelector.
16
 */
17
class DOMSelector
18
{
19
    /**
20
     * @var array
21
     */
22
    private $config = [];
23
24
    /**
25
     * @var array
26
     */
27
    private $formatters = [];
28
29
    /**
30
     * Selector DOM class
31 48
     *
32
     * @var Dom
33 48
     */
34
    private $dom;
35 48
36 15
    /**
37 15
     * Type Handler Class
38 15
     *
39
     * @var TypeProvider
40
     */
41
    protected $typeProvider;
42 48
43
    /**
44
     * DOMSelector constructor.
45
     *
46
     * @param array $config
47 48
     * @param array $formatters
48
     */
49 48
    public function __construct(array $config, array $formatters = [])
50
    {
51 48
        $this->config = $config;
52
        $this->dom = new Dom();
53
        $this->typeProvider = new TypeProvider();
54
55
        if (!empty($formatters)) {
56
            foreach ($formatters as $formatter) {
57 3
                if ($formatter instanceof FormatterInterface) {
58
                    $this->formatters[$formatter->getName()] = $formatter;
59 3
                }
60
            }
61 3
        }
62
    }
63
64
    /**
65
     * Create Extractor object from yaml string.
66
     */
67
    public static function fromYamlString(string $yaml_string, array $formatters = []): DOMSelector
68
    {
69 3
        $config = \yaml_parse($yaml_string);
70
71 3
        return new DOMSelector($config, $formatters);
72
    }
73
74
    /**
75
     * Create Extractor object from yaml file.
76
     */
77
    public static function fromYamlFile(string $yaml_file, array $formatters = []): DOMSelector
78
    {
79 3
        $config = \yaml_parse_file($yaml_file);
80
81 3
        return new DOMSelector($config, $formatters);
82
    }
83
84
    /**
85
     * Get config.
86
     *
87
     * @return array
88
     */
89
    public function getConfig(): array
90
    {
91 15
        return $this->config;
92
    }
93 15
94
    /**
95
     * Get all formatters.
96
     *
97
     * @return array
98
     */
99
    public function getFormatters(): array
100
    {
101
        return $this->formatters;
102
    }
103
104
    /**
105 42
     * Get specific formatter.
106
     *
107 42
     * @return false|mixed|FormatterInterface
108
     */
109 42
    public function getFormatter(string $formatter)
110
    {
111 42
        return $this->formatters[$formatter] ?? false;
112
    }
113 42
114 42
    /**
115
     * Get formatters from config.
116
     *
117 42
     * @param string|array $items
118
     * @return array
119
     */
120
    protected function getFormettersFromConfig($items): array
121
    {
122
        $formatters = [];
123
124
        if (!is_array($items)) {
125
            $items = [$items];
126
        }
127
128 42
        foreach ($items as $item) {
129
            $formatter = $this->getFormatter($item);
130 42
            if ($formatter) {
131
                $formatters[$item] = $formatter;
132
            }
133 42
        }
134 3
135
        return $formatters;
136
    }
137 42
138 3
    /**
139
     * Extract config items from HTML string.
140
     *
141 39
     * @param string|Dom $html
142 9
     *
143
     * @return array
144 33
     */
145
    public function extract($html): array
146
    {
147 39
        if (!$html instanceof Dom) {
148
            $this->dom->loadStr($html);
149 39
        }
150 39
151 6
        $fields_data = [];
152
153 39
        foreach ($this->config as $field_name => $field_config) {
154
            $fields_data[$field_name] = $this->extractSelector($field_config, $this->dom);
155 39
        }
156 12
157 3
        return $fields_data;
158
    }
159
160 12
    /**
161 12
     * Extract config items from HTML file.
162 12
     *
163
     * @param string $file
164
     *
165
     * @throws Exception
166
     *
167 39
     * @return array
168
     */
169
    public function extractFromFile(string $file): array
170 39
    {
171 6
        try {
172
            $this->dom->loadFromFile($file);
173 36
        } catch (Exception $e) {
174
            throw new Exception($e->getMessage());
175
        }
176
177 6
        return $this->extract($this->dom);
178
    }
179
180
    /**
181
     * Extract config items from url.
182
     *
183
     * @param string                     $url
184
     * @param ClientInterface|null|mixed $client
185
     *
186
     * @throws Exception|ClientExceptionInterface
187
     *
188
     * @return array
189 39
     */
190
    public function extractFromUrl(string $url, $client = null): array
191 39
    {
192
        try {
193 39
            $this->dom->loadFromUrl($url, null, $client);
194 9
        } catch (Exception $e) {
195 30
            throw new Exception($e->getMessage());
196 3
        }
197 30
198 3
        return $this->extract($this->dom);
199 27
    }
200 3
201 27
    /**
202 27
     * Extract selector.
203
     *
204
     * @param array     $field_config
205 39
     * @param Dom|mixed $dom
206
     *
207 12
     * @return array|string|bool
208 12
     */
209
    protected function extractSelector(array $field_config, $dom)
210
    {
211
        try {
212 39
            $elements = $dom->find($field_config['css']);
213
        } catch (Exception $e) {
214
            $elements = [];
215
        }
216
217
        $item_type = $this->typeProvider->getType($field_config['type'] ?? '') ? $field_config['type'] : 'Text';
218
219
        $values = [];
220
221
        foreach ($elements as $element) {
222
            if (isset($field_config['children'])) {
223 6
                $value = $this->getChildItem($field_config, $element);
224
            } else {
225 6
                $formatters = $this->getFormettersFromConfig($field_config['format'] ?? []);
226 6
227
                $value = $this->extractField($element, $item_type, $field_config['attribute'] ?? null, $formatters);
228 6
            }
229 6
230 6
            if (isset($field_config['multiple']) && $field_config['multiple'] === true) {
231
                $values[] = $value;
232
            } else {
233 6
                return $value;
234
            }
235
        }
236
237
        return $values;
238
    }
239
240
    /**
241
     * Extract field.
242
     *
243
     * @param mixed  $element
244
     * @param string $item_type
245
     * @param mixed  $attribute
246
     * @param array  $formatters
247
     *
248
     * @return false|mixed|string
249
     */
250
    protected function extractField($element, string $item_type, $attribute = null, array $formatters = [])
251
    {
252
        $content = false;
253
254
        $type = $this->typeProvider->getType($item_type);
255
256
        if ($type) {
257
            $content = $type->getContent($element, $attribute);
258
        }
259
260
        if (!empty($formatters) && $content) {
261
            /** @var FormatterInterface $formatter */
262
            foreach ($formatters as $formatter) {
263
                $content = $formatter->format($content);
264
            }
265
        }
266
267
        return $content;
268
    }
269
270
    /**
271
     * Get child item.
272
     *
273
     * @param array $field_config
274
     * @param mixed $element
275
     *
276
     * @return array
277
     */
278
    public function getChildItem(array $field_config, $element): array
279
    {
280
        $child_config = $field_config['children'];
281
        $child_item = [];
282
283
        foreach ($child_config as $config_name => $config_fields) {
284
            $child_value = $this->extractSelector($config_fields, $element);
285
            $child_item[$config_name] = $child_value;
286
        }
287
288
        return $child_item;
289
    }
290
}
291