Passed
Pull Request — master (#170)
by
unknown
18:50 queued 13:42
created

HtmlSplitter::__construct()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 2
nc 2
nop 1
dl 0
loc 4
ccs 3
cts 3
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Scout Extended.
7
 *
8
 * (c) Algolia Team <[email protected]>
9
 *
10
 *  For the full copyright and license information, please view the LICENSE
11
 *  file that was distributed with this source code.
12
 */
13
14
namespace Algolia\ScoutExtended\Splitters;
15
16
use DOMXPath;
17
use DOMDocument;
18
use Algolia\ScoutExtended\Contracts\SplitterContract;
19
use Algolia\ScoutExtended\Splitters\HtmlSplitter\Node;
20
use Algolia\ScoutExtended\Splitters\HtmlSplitter\NodeCollection;
21
22
final class HtmlSplitter implements SplitterContract
23
{
24
    /**
25
     * The list of html tags.
26
     *
27
     * @var string[]
28
     */
29
    private $tags = [
30
        'h1',
31
        'h2',
32
        'h3',
33
        'h4',
34
        'h5',
35
        'h6',
36
        'p',
37
    ];
38
39
    /**
40
     * Creates a new instance of the class.
41
     *
42
     * @param array $tags
43
     *
44
     * @return void
45
     */
46 5
    public function __construct(array $tags = null)
47
    {
48 5
        if ($tags !== null) {
49 2
            $this->tags = $tags;
50
        }
51 5
    }
52
53
    /**
54
     * Acts a static factory.
55
     *
56
     * @param string|array<string> $tags
57
     *
58
     * @return static
59
     */
60 2
    public static function by($tags)
61
    {
62 2
        return new static((array) $tags);
63
    }
64
65
    /**
66
     * Splits the given value.
67
     *
68
     * @param object $searchable
69
     * @param string $value
70
     *
71
     * @return array
72
     */
73 5
    public function split($searchable, $value): array
74
    {
75 5
        $dom = new DOMDocument();
76
        //DOMDocument is only for HTML4, this exception is too avoid errors from HTML5
77
        try {
78 5
            $dom->loadHTML($value);
79 2
        } catch (\ErrorException $exception) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
80
        }
81
82 5
        $xpath = new DOMXpath($dom);
83 5
        $xpathQuery = '//'.implode(' | //', $this->tags);
84 5
        $nodes = $xpath->query($xpathQuery);
85 5
        $nodeCollection = new NodeCollection($this->tags);
86
87 5
        foreach ($nodes as $node) {
88 5
            $nodeCollection->push(new Node($node->nodeName, $node->textContent));
89
        }
90
91 5
        return $nodeCollection->toArray();
92
    }
93
}
94