Completed
Pull Request — master (#170)
by
unknown
11:18 queued 06:29
created

HtmlSplitter   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 161
Duplicated Lines 0 %

Test Coverage

Coverage 97.96%

Importance

Changes 0
Metric Value
eloc 52
dl 0
loc 161
ccs 48
cts 49
cp 0.9796
rs 10
c 0
b 0
f 0
wmc 17

7 Methods

Rating   Name   Duplication   Size   Complexity  
A importanceWeight() 0 11 3
A __construct() 0 4 2
A addObjectToQueue() 0 16 3
A findValue() 0 3 1
A by() 0 3 1
A split() 0 21 2
A cleanRecords() 0 16 5
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Scout Extended.
7
 *
8
 * (c) Algolia Team <[email protected]>
9
 *
10
 *  For the full copyright and license information, please view the LICENSE
11
 *  file that was distributed with this source code.
12
 */
13
14
namespace Algolia\ScoutExtended\Splitters;
15
16
use DOMDocument;
17
use DOMXPath;
18
use Algolia\ScoutExtended\Contracts\SplitterContract;
19
20
class HtmlSplitter implements SplitterContract
21
{
22
    /**
23
     * The list of html tags.
24
     *
25
     * @var string[]
26
     */
27
    protected $acceptedNodes = [
28
        "h1",
29
        "h2",
30
        "h3",
31
        "h4",
32
        "h5",
33
        "h6",
34
        "p",
35
    ];
36
37
    /**
38
     * Creates a new instance of the class.
39
     *
40
     * @param array $acceptedNodes
41
     *
42
     * @return void
43
     */
44 3
    public function __construct(array $acceptedNodes = null)
45
    {
46 3
        if ($acceptedNodes !== null) {
47 2
            $this->acceptedNodes = $acceptedNodes;
48
        }
49 3
    }
50
51
    /**
52
     * Find it's value in $acceptedNodes.
53
     *
54
     * @param array $object
55
     *
56
     * @return int
57
     */
58 3
    public function findValue($object): int
59
    {
60 3
        return array_search((key($object)), $this->acceptedNodes);
0 ignored issues
show
Bug Best Practice introduced by
The expression return array_search(key(..., $this->acceptedNodes) could return the type false|string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
61
    }
62
63
    /**
64
     * Add object to queue.
65
     *
66
     * @param array $object
67
     * @param array $queue
68
     *
69
     * @return array
70
     */
71 3
    public function addObjectToQueue($object, $queue): array
72
    {
73 3
        if (count($queue) == 0) {
74 3
            $queue[] = $object;
75
76 3
            return $queue;
77
        } else {
78
79 3
            if ($this->findValue($object) > $this->findValue(end($queue))) {
80 1
                $queue[] = $object;
81
82 1
                return $queue;
83
            } else {
84 3
                array_pop($queue);
85
86 3
                return $this->addObjectToQueue($object, $queue);
87
            }
88
        }
89
    }
90
91
    /**
92
     * Importance formula.
93
     * Give integer from tags ranking.
94
     *
95
     * @param \DOMElement $node
96
     * @param array $queue
97
     *
98
     * @return int
99
     */
100 3
    public function importanceWeight($node, $queue): int
101
    {
102 3
        if ($node->nodeName == 'p') {
103 1
            if (empty(end($queue))) {
104
                return 0;
105
            }
106
107 1
            return (count($this->acceptedNodes) - 1) + (array_search(key(end($queue)), $this->acceptedNodes));
108
        }
109
110 3
        return array_search($node->nodeName, $this->acceptedNodes);
0 ignored issues
show
Bug Best Practice introduced by
The expression return array_search($nod..., $this->acceptedNodes) could return the type false|string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
111
    }
112
113
    /**
114
     * Clean Records to have a correct format.
115
     *
116
     *
117
     * @param array $records
118
     *
119
     * @return array
120
     */
121 3
    function cleanRecords($records): array
0 ignored issues
show
Best Practice introduced by
It is generally recommended to explicitly declare the visibility for methods.

Adding explicit visibility (private, protected, or public) is generally recommend to communicate to other developers how, and from where this method is intended to be used.

Loading history...
122
    {
123 3
        $newRecords = [];
124 3
        foreach ($records as $record) {
125 3
            foreach ($record as $r) {
126 3
                foreach ($r as $res => $values) {
127 3
                    $newRecord[$res] = $values;
128 3
                    if ($res == 'importance') {
129 3
                        $newRecords[] = $newRecord;
130 3
                        $newRecord = [];
131
                    }
132
                }
133
            }
134
        }
135
136 3
        return $newRecords;
137
    }
138
139
    /**
140
     * Acts a static factory.
141
     *
142
     * @param  string|array $tags
143
     *
144
     * @return static
145
     */
146 2
    public static function by($tags)
147
    {
148 2
        return new static((array) $tags);
149
    }
150
151
    /**
152
     * Splits the given value.
153
     *
154
     * @param  object $searchable
155
     * @param  string $value
156
     *
157
     * @return array
158
     */
159
160 3
    public function split($searchable, $value): array
161
    {
162 3
        $dom = new DOMDocument();
163 3
        $dom->loadHTML($value);
164 3
        $xpath = new DOMXpath($dom);
165 3
        $queue = [];
166 3
        $xpathQuery = "//".implode(" | //", $this->acceptedNodes);
167 3
        $nodes = $xpath->query($xpathQuery);
168
169 3
        foreach ($nodes as $node) {
170 3
            $object = [$node->nodeName => $node->textContent];
171 3
            $importance = $this->importanceWeight($node, $queue);
172 3
            $queue = $this->addObjectToQueue($object, $queue);
173 3
            $cloneQueue = $queue;
174 3
            $cloneQueue[] = ['importance' => $importance];
175 3
            $records[] = $cloneQueue;
176
        }
177
178 3
        $records = $this->cleanRecords($records);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $records seems to be defined by a foreach iteration on line 169. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
179
180 3
        return $records;
181
    }
182
}
183