Completed
Pull Request — feat/html-splitter (#178)
by Nuno
24:27 queued 20:28
created

HtmlSplitter::findValue()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Scout Extended.
7
 *
8
 * (c) Algolia Team <[email protected]>
9
 *
10
 *  For the full copyright and license information, please view the LICENSE
11
 *  file that was distributed with this source code.
12
 */
13
14
namespace Algolia\ScoutExtended\Splitters;
15
16
use DOMXPath;
17
use DOMDocument;
18
use Algolia\ScoutExtended\Contracts\SplitterContract;
19
20
class HtmlSplitter implements SplitterContract
21
{
22
    /**
23
     * The list of html tags.
24
     *
25
     * @var string[]
26
     */
27
    protected $nodes = [
28
        'h1',
29
        'h2',
30
        'h3',
31
        'h4',
32
        'h5',
33
        'h6',
34
        'p',
35
    ];
36
37
    /**
38
     * String for key check purpose.
39
     *
40
     * @const string IMPORTANCE
41
     */
42
    const IMPORTANCE = 'importance';
43
44
    /**
45
     * Creates a new instance of the class.
46
     *
47
     * @param array $nodes
48
     *
49
     * @return void
50
     */
51 3
    public function __construct(array $nodes = null)
52
    {
53 3
        if ($nodes !== null) {
54 2
            $this->nodes = $nodes;
55
        }
56 3
    }
57
58
    /**
59
     * Find weight of current nodes.
60
     *
61
     * @param array $object
62
     *
63
     * @return int
64
     */
65 3
    public function findWeight(array $object): int
66
    {
67 3
        return (int) array_search((key($object)), $this->nodes);
68
    }
69
70
    /**
71
     * Add object to queue.
72
     *
73
     * @param array $object
74
     * @param array $queue
75
     *
76
     * @return array
77
     */
78 3
    public function addObjectToQueue(array $object, array $queue): array
79
    {
80 3
        if (count($queue) == 0) {
81 3
            $queue[] = $object;
82
83 3
            return $queue;
84
        } else {
85 3
            if ($this->findWeight($object) > $this->findWeight(end($queue))) {
86 1
                $queue[] = $object;
87
88 1
                return $queue;
89
            } else {
90 3
                array_pop($queue);
91
92 3
                return $this->addObjectToQueue($object, $queue);
93
            }
94
        }
95
    }
96
97
    /**
98
     * Importance formula.
99
     * Give integer from tags ranking.
100
     *
101
     * @param \DOMElement $node
102
     * @param array $queue
103
     *
104
     * @return int
105
     */
106 3
    public function importanceWeight(\DOMElement $node, array $queue): int
107
    {
108 3
        if ($node->nodeName === 'p') {
109 1
            if (empty(end($queue))) {
110
                return 0;
111
            }
112
113 1
            return (int) (count($this->nodes) - 1) + (int) (array_search(key(end($queue)), $this->nodes));
114
        }
115
116 3
        return (int) array_search($node->nodeName, $this->nodes);
117
    }
118
119
    /**
120
     * Clean Records to have a correct format.
121
     *
122
     *
123
     * @param array $objects
124
     *
125
     * @return array
126
     */
127 3
    public function cleanRecords(array $objects): array
128
    {
129 3
        $records = [];
130 3
        foreach ($objects as $object) {
131 3
            foreach ($object as $data) {
132 3
                foreach ($data as $key => $value) {
133 3
                    $record[$key] = $value;
134 3
                    if ($key === self::IMPORTANCE) {
135 3
                        $records[] = $record;
136 3
                        $record = [];
137
                    }
138
                }
139
            }
140
        }
141
142 3
        return $records;
143
    }
144
145
    /**
146
     * Acts a static factory.
147
     *
148
     * @param  string|array $tags
149
     *
150
     * @return static
151
     */
152 2
    public static function by($tags)
153
    {
154 2
        return new static((array) $tags);
155
    }
156
157
    /**
158
     * Splits the given value.
159
     *
160
     * @param  object $searchable
161
     * @param  string $value
162
     *
163
     * @return array
164
     */
165 3
    public function split($searchable, $value): array
166
    {
167 3
        $dom = new DOMDocument();
168 3
        $dom->loadHTML($value);
169 3
        $xpath = new DOMXpath($dom);
170 3
        $queue = [];
171 3
        $objects = [];
172 3
        $xpathQuery = '//'.implode(' | //', $this->nodes);
173 3
        $nodes = $xpath->query($xpathQuery);
174
175 3
        foreach ($nodes as $node) {
176 3
            $object = [$node->nodeName => $node->textContent];
177 3
            $importance = $this->importanceWeight($node, $queue);
178 3
            $queue = $this->addObjectToQueue($object, $queue);
179 3
            $cloneQueue = $queue;
180 3
            $cloneQueue[] = [self::IMPORTANCE => $importance];
181 3
            $objects[] = $cloneQueue;
182
        }
183
184 3
        $records = $this->cleanRecords($objects);
185
186 3
        return $records;
187
    }
188
}
189