Passed
Push — master ( 142ace...2d4116 )
by Yahya
10:50 queued 04:36
created

DOMSelector::__construct()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 4

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 6
c 1
b 0
f 0
nc 4
nop 2
dl 0
loc 9
ccs 2
cts 2
cp 1
crap 4
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace DOMSelector;
6
7
use DOMSelector\Contracts\FormatterInterface;
8
use Exception;
9
use PHPHtmlParser\Dom;
10
use Psr\Http\Client\ClientExceptionInterface;
11
use Psr\Http\Client\ClientInterface;
12
13
/**
14
 * Class DOMSelector.
15
 */
16
class DOMSelector
17
{
18
    /**
19
     * @var array
20
     */
21
    private $config = [];
22
23
    /**
24
     * @var array
25
     */
26
    private $formatters = [];
27
28
    /**
29
     * @var Dom
30
     */
31 48
    private $dom;
32
33 48
    /**
34
     * DOMSelector constructor.
35 48
     *
36 15
     * @param array $config
37 15
     * @param array $formatters
38 15
     */
39
    public function __construct(array $config, array $formatters = [])
40
    {
41
        $this->config = $config;
42 48
        $this->dom = new Dom();
43
44
        if (!empty($formatters)) {
45
            foreach ($formatters as $formatter) {
46
                if ($formatter instanceof FormatterInterface) {
47 48
                    $this->formatters[$formatter->getName()] = $formatter;
48
                }
49 48
            }
50
        }
51 48
    }
52
53
    /**
54
     * Create Extractor object from yaml string.
55
     */
56
    public static function fromYamlString(string $yaml_string, array $formatters = []): DOMSelector
57 3
    {
58
        $config = \yaml_parse($yaml_string);
59 3
60
        return new DOMSelector($config, $formatters);
61 3
    }
62
63
    /**
64
     * Create Extractor object from yaml file.
65
     */
66
    public static function fromYamlFile(string $yaml_file, array $formatters = []): DOMSelector
67
    {
68
        $config = \yaml_parse_file($yaml_file);
69 3
70
        return new DOMSelector($config, $formatters);
71 3
    }
72
73
    /**
74
     * Get config.
75
     *
76
     * @return array
77
     */
78
    public function getConfig(): array
79 3
    {
80
        return $this->config;
81 3
    }
82
83
    /**
84
     * Get all formatters.
85
     *
86
     * @return array
87
     */
88
    public function getFormatters(): array
89
    {
90
        return $this->formatters;
91 15
    }
92
93 15
    /**
94
     * Get specific formatter.
95
     *
96
     * @return false|mixed|FormatterInterface
97
     */
98
    public function getFormatter(string $formatter)
99
    {
100
        return $this->formatters[$formatter] ?? false;
101
    }
102
103
    /**
104
     * Extract config items from HTML string.
105 42
     *
106
     * @param string|Dom $html
107 42
     *
108
     * @return array
109 42
     */
110
    public function extract($html): array
111 42
    {
112
        if (!$html instanceof Dom) {
113 42
            $this->dom->loadStr($html);
114 42
        }
115
116
        $fields_data = [];
117 42
118
        foreach ($this->config as $field_name => $field_config) {
119
            $fields_data[$field_name] = $this->extractSelector($field_config, $this->dom);
120
        }
121
122
        return $fields_data;
123
    }
124
125
    /**
126
     * Extract config items from HTML file.
127
     *
128 42
     * @param string $file
129
     *
130 42
     * @throws Exception
131
     *
132
     * @return array
133 42
     */
134 3
    public function extractFromFile(string $file): array
135
    {
136
        try {
137 42
            $this->dom->loadFromFile($file);
138 3
        } catch (Exception $e) {
139
            throw new Exception($e->getMessage());
140
        }
141 39
142 9
        return $this->extract($this->dom);
143
    }
144 33
145
    /**
146
     * Extract config items from url.
147 39
     *
148
     * @param string                     $url
149 39
     * @param ClientInterface|null|mixed $client
150 39
     *
151 6
     * @throws Exception|ClientExceptionInterface
152
     *
153 39
     * @return array
154
     */
155 39
    public function extractFromUrl(string $url, $client = null): array
156 12
    {
157 3
        try {
158
            $this->dom->loadFromUrl($url, null, $client);
159
        } catch (Exception $e) {
160 12
            throw new Exception($e->getMessage());
161 12
        }
162 12
163
        return $this->extract($this->dom);
164
    }
165
166
    /**
167 39
     * Extract selector.
168
     *
169
     * @param array     $field_config
170 39
     * @param Dom|mixed $dom
171 6
     *
172
     * @return array|string|bool
173 36
     */
174
    public function extractSelector(array $field_config, $dom)
175
    {
176
        try {
177 6
            $elements = $dom->find($field_config['css']);
178
        } catch (Exception $e) {
179
            $elements = [];
180
        }
181
182
        if (count($elements) < 1) {
183
            return false;
184
        }
185
186
        $types = [
187
            'Attribute', 'Html', 'Image', 'Link', 'Text',
188
        ];
189 39
190
        if (!isset($field_config['type']) || !in_array($field_config['type'], $types)) {
191 39
            $item_type = 'Text';
192
        } else {
193 39
            $item_type = $field_config['type'];
194 9
        }
195 30
196 3
        $values = [];
197 30
198 3
        foreach ($elements as $element) {
199 27
            if (isset($field_config['children'])) {
200 3
                $value = $this->getChildItem($field_config, $element);
201 27
            } else {
202 27
                $formatters = [];
203
204
                if (isset($field_config['format'])) {
205 39
                    if (!is_array($field_config['format'])) {
206
                        $field_config['format'] = [$field_config['format']];
207 12
                    }
208 12
209
                    foreach ($field_config['format'] as $f) {
210
                        if ($formatter = $this->getFormatter($f)) {
211
                            $formatters[$f] = $formatter;
212 39
                        }
213
                    }
214
                }
215
216
                $value = $this->extractField($element, $item_type, $field_config['attribute'] ?? false, $formatters);
217
            }
218
219
            if (isset($field_config['multiple']) && $field_config['multiple'] === true) {
220
                $values[] = $value;
221
            } else {
222
                return $value;
223 6
            }
224
        }
225 6
226 6
        return $values;
227
    }
228 6
229 6
    /**
230 6
     * Extract field.
231
     *
232
     * @param mixed  $element
233 6
     * @param string $item_type
234
     * @param mixed  $attribute
235
     * @param array  $formatters
236
     *
237
     * @return false|mixed|string
238
     */
239
    public function extractField($element, string $item_type, $attribute = false, array $formatters = [])
240
    {
241
        switch ($item_type) {
242
            case 'Attribute':
243
                $content = $element->getAttribute($attribute);
244
                break;
245
            case 'Html':
246
                $content = $element->innerHtml;
247
                break;
248
            case 'Image':
249
                $content = $element->getAttribute('src');
250
                break;
251
            case 'Link':
252
                $content = $element->getAttribute('href');
253
                break;
254
            case 'Text':
255
            default:
256
                $content = trim(strip_tags($element->innerHtml));
257
                break;
258
        }
259
260
        if (!empty($formatters) && $content) {
261
            /** @var FormatterInterface $formatter */
262
            foreach ($formatters as $formatter) {
263
                $content = $formatter->format($content);
264
            }
265
        }
266
267
        return $content;
268
    }
269
270
    /**
271
     * Get child item.
272
     *
273
     * @param array $field_config
274
     * @param mixed $element
275
     *
276
     * @return array
277
     */
278
    public function getChildItem(array $field_config, $element): array
279
    {
280
        $child_config = $field_config['children'];
281
        $child_item = [];
282
283
        foreach ($child_config as $config_name => $config_fields) {
284
            $child_value = $this->extractSelector($config_fields, $element);
285
            $child_item[$config_name] = $child_value;
286
        }
287
288
        return $child_item;
289
    }
290
}
291