Passed
Push — analysis-qBJygA ( 146b42 )
by Nuno
09:11 queued 41s
created

HtmlSplitter::cleanRecords()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 9
nc 5
nop 1
dl 0
loc 16
ccs 10
cts 10
cp 1
crap 5
rs 9.6111
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Scout Extended.
7
 *
8
 * (c) Algolia Team <[email protected]>
9
 *
10
 *  For the full copyright and license information, please view the LICENSE
11
 *  file that was distributed with this source code.
12
 */
13
14
namespace Algolia\ScoutExtended\Splitters;
15
16
use DOMXPath;
17
use DOMDocument;
18
use Algolia\ScoutExtended\Contracts\SplitterContract;
19
use Algolia\ScoutExtended\Splitters\HtmlSplitterComponent\Queue;
20
use Algolia\ScoutExtended\Splitters\HtmlSplitterComponent\ObjectQueue;
21
22
final class HtmlSplitter implements SplitterContract
23
{
24
    /**
25
     * The list of html tags.
26
     *
27
     * @var string[]
28
     */
29
    protected $tags = [
30
        'h1',
31
        'h2',
32
        'h3',
33
        'h4',
34
        'h5',
35
        'h6',
36
        'p',
37
    ];
38
39
    /**
40
     * Creates a new instance of the class.
41
     *
42
     * @param array $nodes
43
     *
44
     * @return void
45
     */
46 5
    public function __construct(array $tags = null)
47
    {
48 5
        if ($tags !== null) {
49 2
            $this->tags = $tags;
50
        }
51 5
    }
52
53
    /**
54
     * Acts a static factory.
55
     *
56
     * @param string|array<string> $tags
57
     *
58
     * @return static
59
     */
60 2
    public static function by($tags)
61
    {
62 2
        return new static((array) $tags);
63
    }
64
65
    /**
66
     * Splits the given value.
67
     *
68
     * @param object $searchable
69
     * @param string $value
70
     *
71
     * @return array
72
     */
73 5
    public function split($searchable, $value): array
74
    {
75 5
        $dom = new DOMDocument();
76
        //DOMDocument is only for HTML4, this exception is too avoid errors from HTML5
77
        try {
78 5
            $dom->loadHTML($value);
79 2
        } catch (\ErrorException $exception) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
80
        }
81 5
        $xpath = new DOMXpath($dom);
82 5
        $queue = new Queue();
83 5
        $xpathQuery = '//'.implode(' | //', $this->tags);
84 5
        $tags = $xpath->query($xpathQuery);
85
86 5
        foreach ($tags as $node) {
87 5
            $objectQueue = new ObjectQueue($node->nodeName, $node->textContent);
88 5
            $queue->addObjectQueue($objectQueue);
89
        }
90
91 5
        return $queue->sanitizeQueue();
92
    }
93
}
94