Completed
Branch master (c84c45)
by Gawain
11:49 queued 05:43
created

Excerpt::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 5
rs 9.4285
cc 1
eloc 3
nc 1
nop 2
1
<?php
2
3
namespace Bolt\Helpers;
4
5
use Bolt\Legacy\Content;
6
7
class Excerpt
8
{
9
    /** @var Content|array|string */
10
    protected $body;
11
    /** @var string */
12
    protected $title;
13
14
    /**
15
     * Constructor.
16
     *
17
     * @param Content|array|string $body
18
     * @param string|null          $title
19
     */
20
    public function __construct($body, $title = null)
21
    {
22
        $this->body = $body;
23
        $this->title = trim($title);
24
    }
25
26
    /**
27
     * Get the excerpt of a given piece of text.
28
     *
29
     * @param int               $length
30
     * @param bool              $includeTitle
31
     * @param array|string|null $focus
32
     *
33
     * @return string|null
34
     */
35
    public function getExcerpt($length = 200, $includeTitle = false, $focus = null)
36
    {
37
        $title = null;
38
        if ($includeTitle && $this->title !== null) {
39
            $title = Html::trimText(strip_tags($this->title), $length);
40
            $length = $length - strlen($title);
41
        }
42
43
        if ($this->body instanceof Content) {
44
            $this->body = $this->body->getValues();
45
        }
46
47
        if (is_array($this->body)) {
48
            // Assume it's an array, strip some common fields that we don't need, implode the rest.
49
            $stripKeys = [
50
                'id',
51
                'slug',
52
                'datecreated',
53
                'datechanged',
54
                'username',
55
                'ownerid',
56
                'title',
57
                'contenttype',
58
                'status',
59
                'taxonomy',
60
                'templatefields',
61
            ];
62
63
            foreach ($stripKeys as $key) {
64
                unset($this->body[$key]);
65
            }
66
            $excerpt = implode(' ', $this->body);
67
        } elseif (is_string($this->body) || (is_object($this->body) && method_exists($this->body, '__toString'))) {
68
            // otherwise we just use the string.
69
            $excerpt = (string) $this->body;
70
        } else {
71
            // Nope, got nothing.
72
            $excerpt = '';
73
        }
74
75
        $excerpt = str_replace('>', '> ', $excerpt);
76
77
        if (empty($focus)) {
78
            $excerpt = Html::trimText(strip_tags($excerpt), $length);
79
        } else {
80
            $excerpt = $this->extractRelevant($focus, strip_tags($excerpt), $length);
81
        }
82
83
        if ($title !== null) {
84
            $excerpt = '<b>' . $title . '</b> ' . $excerpt;
85
        }
86
87
        return $excerpt;
88
    }
89
90
    /**
91
     * Find the locations of each of the words.
92
     * Nothing exciting here. The array_unique is required, unless you decide
93
     * to make the words unique before passing in.
94
     *
95
     * @param array  $words
96
     * @param string $fulltext
97
     *
98
     * @return array
99
     */
100
    private function extractLocations(array $words, $fulltext)
101
    {
102
        $locations = [];
103
        foreach ($words as $word) {
104
            $wordLen = strlen($word);
105
            $loc = stripos($fulltext, $word);
106
            while ($loc !== false) {
107
                $locations[] = $loc;
108
                $loc = stripos($fulltext, $word, $loc + $wordLen);
109
            }
110
        }
111
        $locations = array_unique($locations);
112
        sort($locations);
113
114
        return $locations;
115
    }
116
117
    /**
118
     * Work out which is the most relevant portion to display
119
     * This is done by looping over each match and finding the smallest distance between two found
120
     * strings. The idea being that the closer the terms are the better match the snippet would be.
121
     * When checking for matches we only change the location if there is a better match.
122
     * The only exception is where we have only two matches in which case we just take the
123
     * first as will be equally distant.
124
     *
125
     * @param array   $locations
126
     * @param integer $prevCount
127
     *
128
     * @return int
129
     */
130
    private function determineSnipLocation(array $locations, $prevCount)
131
    {
132
        // If we only have 1 match we don't actually do the for loop so set to the first
133
        $startPos = $locations[0];
134
        $loccount = count($locations);
135
        $smallestDiff = PHP_INT_MAX;
136
137
        // If we only have 2, skip as it's probably equally relevant
138
        if (count($locations) > 2) {
139
            // skip the first as we check 1 behind
140
            for ($i = 1; $i < $loccount; $i++) {
141
                if ($i === $loccount - 1) { // at the end
142
                    $diff = $locations[$i] - $locations[$i - 1];
143
                } else {
144
                    $diff = $locations[$i + 1] - $locations[$i];
145
                }
146
147
                if ($smallestDiff > $diff) {
148
                    $smallestDiff = $diff;
149
                    $startPos = $locations[$i];
150
                }
151
            }
152
        }
153
154
        $startPos = $startPos > $prevCount ? $startPos - $prevCount : 0;
155
156
        return $startPos;
157
    }
158
159
    /**
160
     * Center on, and highlight search terms in excerpts.
161
     *
162
     * @see: http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/
163
     *
164
     * @param string|array $words
165
     * @param string       $fulltext
166
     * @param integer      $relLength
167
     *
168
     * @return mixed|string
169
     */
170
    private function extractRelevant($words, $fulltext, $relLength = 300)
171
    {
172
        if (!is_array($words)) {
173
            $words = explode(' ', $words);
174
        }
175
176
        // 1/6 ratio on prevcount tends to work pretty well and puts the terms
177
        // in the middle of the extract
178
        $prevCount = floor($relLength / 6);
179
180
        $indicator = '…';
181
182
        $textlength = strlen($fulltext);
183
        if ($textlength <= $relLength) {
184
            return $fulltext;
185
        }
186
187
        $locations = $this->extractLocations($words, $fulltext);
188
        $startPos  = $this->determineSnipLocation($locations, $prevCount);
189
190
        // if we are going to snip too much...
191
        if ($textlength - $startPos < $relLength) {
192
            $startPos = $startPos - ($textlength - $startPos) / 2;
193
        }
194
195
        $relText = substr($fulltext, $startPos, $relLength);
196
197
        // check to ensure we dont snip the last word if thats the match
198
        if ($startPos + $relLength < $textlength) {
199
            $relText = substr($relText, 0, strrpos($relText, ' ')) . $indicator; // remove last word
200
        }
201
202
        // If we trimmed from the front add '…'
203
        if ($startPos != 0) {
204
            $relText = $indicator . substr($relText, strpos($relText, ' ') + 1); // remove first word
205
        }
206
207
        // Highlight the words, using the `<mark>` tag.
208
        foreach ($words as $word) {
209
            $relText = preg_replace('/\b(' . $word . ')\b/i', '<mark>$1</mark>', $relText);
210
        }
211
212
        return $relText;
213
    }
214
}
215