Completed
Pull Request — feat/html-splitter (#174)
by Nuno
32:55 queued 28:46
created

HtmlSplitter::addObjectToQueue()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 10
nc 3
nop 2
dl 0
loc 15
ccs 9
cts 9
cp 1
crap 3
rs 9.9332
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Scout Extended.
7
 *
8
 * (c) Algolia Team <[email protected]>
9
 *
10
 *  For the full copyright and license information, please view the LICENSE
11
 *  file that was distributed with this source code.
12
 */
13
14
namespace Algolia\ScoutExtended\Splitters;
15
16
use DOMXPath;
17
use DOMDocument;
18
use Algolia\ScoutExtended\Contracts\SplitterContract;
19
20
class HtmlSplitter implements SplitterContract
21
{
22
    /**
23
     * The list of html tags.
24
     *
25
     * @var string[]
26
     */
27
    protected $acceptedNodes = [
28
        'h1',
29
        'h2',
30
        'h3',
31
        'h4',
32
        'h5',
33
        'h6',
34
        'p',
35
    ];
36
37
    /**
38
     * Creates a new instance of the class.
39
     *
40
     * @param array $acceptedNodes
41
     *
42
     * @return void
43
     */
44 3
    public function __construct(array $acceptedNodes = null)
45
    {
46 3
        if ($acceptedNodes !== null) {
47 2
            $this->acceptedNodes = $acceptedNodes;
48
        }
49 3
    }
50
51
    /**
52
     * Find it's value in $acceptedNodes.
53
     *
54
     * @param array $object
55
     *
56
     * @return int
57
     */
58 3
    public function findValue($object): int
59
    {
60 3
        return array_search((key($object)), $this->acceptedNodes);
0 ignored issues
show
Bug Best Practice introduced by
The expression return array_search(key(..., $this->acceptedNodes) could return the type false|string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
61
    }
62
63
    /**
64
     * Add object to queue.
65
     *
66
     * @param array $object
67
     * @param array $queue
68
     *
69
     * @return array
70
     */
71 3
    public function addObjectToQueue($object, $queue): array
72
    {
73 3
        if (count($queue) == 0) {
74 3
            $queue[] = $object;
75
76 3
            return $queue;
77
        } else {
78 3
            if ($this->findValue($object) > $this->findValue(end($queue))) {
79 1
                $queue[] = $object;
80
81 1
                return $queue;
82
            } else {
83 3
                array_pop($queue);
84
85 3
                return $this->addObjectToQueue($object, $queue);
86
            }
87
        }
88
    }
89
90
    /**
91
     * Importance formula.
92
     * Give integer from tags ranking.
93
     *
94
     * @param \DOMElement $node
95
     * @param array $queue
96
     *
97
     * @return int
98
     */
99 3
    public function importanceWeight($node, $queue): int
100
    {
101 3
        if ($node->nodeName == 'p') {
102 1
            if (empty(end($queue))) {
103
                return 0;
104
            }
105
106 1
            return (count($this->acceptedNodes) - 1) + (array_search(key(end($queue)), $this->acceptedNodes));
107
        }
108
109 3
        return array_search($node->nodeName, $this->acceptedNodes);
0 ignored issues
show
Bug Best Practice introduced by
The expression return array_search($nod..., $this->acceptedNodes) could return the type false|string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
110
    }
111
112
    /**
113
     * Clean Records to have a correct format.
114
     *
115
     *
116
     * @param array $records
117
     *
118
     * @return array
119
     */
120 3
    public function cleanRecords($records): array
121
    {
122 3
        $newRecords = [];
123 3
        foreach ($records as $record) {
124 3
            foreach ($record as $r) {
125 3
                foreach ($r as $res => $values) {
126 3
                    $newRecord[$res] = $values;
127 3
                    if ($res == 'importance') {
128 3
                        $newRecords[] = $newRecord;
129 3
                        $newRecord = [];
130
                    }
131
                }
132
            }
133
        }
134
135 3
        return $newRecords;
136
    }
137
138
    /**
139
     * Acts a static factory.
140
     *
141
     * @param  string|array $tags
142
     *
143
     * @return static
144
     */
145 2
    public static function by($tags)
146
    {
147 2
        return new static((array) $tags);
148
    }
149
150
    /**
151
     * Splits the given value.
152
     *
153
     * @param  object $searchable
154
     * @param  string $value
155
     *
156
     * @return array
157
     */
158 3
    public function split($searchable, $value): array
159
    {
160 3
        $dom = new DOMDocument();
161 3
        $dom->loadHTML($value);
162 3
        $xpath = new DOMXpath($dom);
163 3
        $queue = [];
164 3
        $xpathQuery = '//'.implode(' | //', $this->acceptedNodes);
165 3
        $nodes = $xpath->query($xpathQuery);
166
167 3
        foreach ($nodes as $node) {
168 3
            $object = [$node->nodeName => $node->textContent];
169 3
            $importance = $this->importanceWeight($node, $queue);
170 3
            $queue = $this->addObjectToQueue($object, $queue);
171 3
            $cloneQueue = $queue;
172 3
            $cloneQueue[] = ['importance' => $importance];
173 3
            $records[] = $cloneQueue;
174
        }
175
176 3
        $records = $this->cleanRecords($records);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $records seems to be defined by a foreach iteration on line 167. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
177
178 3
        return $records;
179
    }
180
}
181