Excerpt::extractLocations() - Code Metrics - Inspection of "Merge pull request #4839 from GawainLynch/tests/re..." - bolt/bolt - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (c84c45)

by Gawain

created 2016-02-14 12:25 UTC

Excerpt::extractLocations() A

↳ Parent: Excerpt

Complexity

Conditions	3
Paths	3

Size

Total Lines	16
Code Lines	11

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
c	2
b	0
f	0
dl	0
loc	16
rs	9.4285
cc	3
eloc	11
nc	3
nop	2

<?php

namespace Bolt\Helpers;

use Bolt\Legacy\Content;

class Excerpt
{
    /** @var Content|array|string */
    protected $body;
    /** @var string */
    protected $title;

    /**
     * Constructor.
     *
     * @param Content|array|string $body
     * @param string|null          $title
     */
    public function __construct($body, $title = null)
    {
        $this->body = $body;
        $this->title = trim($title);
    }

    /**
     * Get the excerpt of a given piece of text.
     *
     * @param int               $length
     * @param bool              $includeTitle
     * @param array|string|null $focus
     *
     * @return string|null
     */
    public function getExcerpt($length = 200, $includeTitle = false, $focus = null)
    {
        $title = null;
        if ($includeTitle && $this->title !== null) {
            $title = Html::trimText(strip_tags($this->title), $length);
            $length = $length - strlen($title);
        }

        if ($this->body instanceof Content) {
            $this->body = $this->body->getValues();
        }

        if (is_array($this->body)) {
            // Assume it's an array, strip some common fields that we don't need, implode the rest.
            $stripKeys = [
                'id',
                'slug',
                'datecreated',
                'datechanged',
                'username',
                'ownerid',
                'title',
                'contenttype',
                'status',
                'taxonomy',
                'templatefields',
            ];

            foreach ($stripKeys as $key) {
                unset($this->body[$key]);
            }
            $excerpt = implode(' ', $this->body);
        } elseif (is_string($this->body) || (is_object($this->body) && method_exists($this->body, '__toString'))) {
            // otherwise we just use the string.
            $excerpt = (string) $this->body;
        } else {
            // Nope, got nothing.
            $excerpt = '';
        }

        $excerpt = str_replace('>', '> ', $excerpt);

        if (empty($focus)) {
            $excerpt = Html::trimText(strip_tags($excerpt), $length);
        } else {
            $excerpt = $this->extractRelevant($focus, strip_tags($excerpt), $length);
        }

        if ($title !== null) {
            $excerpt = '<b>' . $title . '</b> ' . $excerpt;
        }

        return $excerpt;
    }

    /**
     * Find the locations of each of the words.
     * Nothing exciting here. The array_unique is required, unless you decide
     * to make the words unique before passing in.
     *
     * @param array  $words
     * @param string $fulltext
     *
     * @return array
     */
    private function extractLocations(array $words, $fulltext)
    {
        $locations = [];
        foreach ($words as $word) {
            $wordLen = strlen($word);
            $loc = stripos($fulltext, $word);
            while ($loc !== false) {
                $locations[] = $loc;
                $loc = stripos($fulltext, $word, $loc + $wordLen);
            }
        }
        $locations = array_unique($locations);
        sort($locations);

        return $locations;
    }

    /**
     * Work out which is the most relevant portion to display
     * This is done by looping over each match and finding the smallest distance between two found
     * strings. The idea being that the closer the terms are the better match the snippet would be.
     * When checking for matches we only change the location if there is a better match.
     * The only exception is where we have only two matches in which case we just take the
     * first as will be equally distant.
     *
     * @param array   $locations
     * @param integer $prevCount
     *
     * @return int
     */
    private function determineSnipLocation(array $locations, $prevCount)
    {
        // If we only have 1 match we don't actually do the for loop so set to the first
        $startPos = $locations[0];
        $loccount = count($locations);
        $smallestDiff = PHP_INT_MAX;

        // If we only have 2, skip as it's probably equally relevant
        if (count($locations) > 2) {
            // skip the first as we check 1 behind
            for ($i = 1; $i < $loccount; $i++) {
                if ($i === $loccount - 1) { // at the end
                    $diff = $locations[$i] - $locations[$i - 1];
                } else {
                    $diff = $locations[$i + 1] - $locations[$i];
                }

                if ($smallestDiff > $diff) {
                    $smallestDiff = $diff;
                    $startPos = $locations[$i];
                }
            }
        }

        $startPos = $startPos > $prevCount ? $startPos - $prevCount : 0;

        return $startPos;
    }

    /**
     * Center on, and highlight search terms in excerpts.
     *
     * @see: http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/
     *
     * @param string|array $words
     * @param string       $fulltext
     * @param integer      $relLength
     *
     * @return mixed|string
     */
    private function extractRelevant($words, $fulltext, $relLength = 300)
    {
        if (!is_array($words)) {
            $words = explode(' ', $words);
        }

        // 1/6 ratio on prevcount tends to work pretty well and puts the terms
        // in the middle of the extract
        $prevCount = floor($relLength / 6);

        $indicator = '…';

        $textlength = strlen($fulltext);
        if ($textlength <= $relLength) {
            return $fulltext;
        }

        $locations = $this->extractLocations($words, $fulltext);
        $startPos  = $this->determineSnipLocation($locations, $prevCount);

        // if we are going to snip too much...
        if ($textlength - $startPos < $relLength) {
            $startPos = $startPos - ($textlength - $startPos) / 2;
        }

        $relText = substr($fulltext, $startPos, $relLength);

        // check to ensure we dont snip the last word if thats the match
        if ($startPos + $relLength < $textlength) {
            $relText = substr($relText, 0, strrpos($relText, ' ')) . $indicator; // remove last word
        }

        // If we trimmed from the front add '…'
        if ($startPos != 0) {
            $relText = $indicator . substr($relText, strpos($relText, ' ') + 1); // remove first word
        }

        // Highlight the words, using the `<mark>` tag.
        foreach ($words as $word) {
            $relText = preg_replace('/\b(' . $word . ')\b/i', '<mark>$1</mark>', $relText);
        }

        return $relText;
    }
}


1			<?php
2
3			namespace Bolt\Helpers;
4
5			use Bolt\Legacy\Content;
6
7			class Excerpt
8			{
9			/** @var Content\|array\|string */
10			protected $body;
11			/** @var string */
12			protected $title;
13
14			/**
15			* Constructor.
16			*
17			* @param Content\|array\|string $body
18			* @param string\|null $title
19			*/
20			public function __construct($body, $title = null)
21			{
22			$this->body = $body;
23			$this->title = trim($title);
24			}
25
26			/**
27			* Get the excerpt of a given piece of text.
28			*
29			* @param int $length
30			* @param bool $includeTitle
31			* @param array\|string\|null $focus
32			*
33			* @return string\|null
34			*/
35			public function getExcerpt($length = 200, $includeTitle = false, $focus = null)
36			{
37			$title = null;
38			if ($includeTitle && $this->title !== null) {
39			$title = Html::trimText(strip_tags($this->title), $length);
40			$length = $length - strlen($title);
41			}
42
43			if ($this->body instanceof Content) {
44			$this->body = $this->body->getValues();
45			}
46
47			if (is_array($this->body)) {
48			// Assume it's an array, strip some common fields that we don't need, implode the rest.
49			$stripKeys = [
50			'id',
51			'slug',
52			'datecreated',
53			'datechanged',
54			'username',
55			'ownerid',
56			'title',
57			'contenttype',
58			'status',
59			'taxonomy',
60			'templatefields',
61			];
62
63			foreach ($stripKeys as $key) {
64			unset($this->body[$key]);
65			}
66			$excerpt = implode(' ', $this->body);
67			} elseif (is_string($this->body) \|\| (is_object($this->body) && method_exists($this->body, '__toString'))) {
68			// otherwise we just use the string.
69			$excerpt = (string) $this->body;
70			} else {
71			// Nope, got nothing.
72			$excerpt = '';
73			}
74
75			$excerpt = str_replace('>', '> ', $excerpt);
76
77			if (empty($focus)) {
78			$excerpt = Html::trimText(strip_tags($excerpt), $length);
79			} else {
80			$excerpt = $this->extractRelevant($focus, strip_tags($excerpt), $length);
81			}
82
83			if ($title !== null) {
84			$excerpt = '<b>' . $title . '</b> ' . $excerpt;
85			}
86
87			return $excerpt;
88			}
89
90			/**
91			* Find the locations of each of the words.
92			* Nothing exciting here. The array_unique is required, unless you decide
93			* to make the words unique before passing in.
94			*
95			* @param array $words
96			* @param string $fulltext
97			*
98			* @return array
99			*/
100			private function extractLocations(array $words, $fulltext)
101			{
102			$locations = [];
103			foreach ($words as $word) {
104			$wordLen = strlen($word);
105			$loc = stripos($fulltext, $word);
106			while ($loc !== false) {
107			$locations[] = $loc;
108			$loc = stripos($fulltext, $word, $loc + $wordLen);
109			}
110			}
111			$locations = array_unique($locations);
112			sort($locations);
113
114			return $locations;
115			}
116
117			/**
118			* Work out which is the most relevant portion to display
119			* This is done by looping over each match and finding the smallest distance between two found
120			* strings. The idea being that the closer the terms are the better match the snippet would be.
121			* When checking for matches we only change the location if there is a better match.
122			* The only exception is where we have only two matches in which case we just take the
123			* first as will be equally distant.
124			*
125			* @param array $locations
126			* @param integer $prevCount
127			*
128			* @return int
129			*/
130			private function determineSnipLocation(array $locations, $prevCount)
131			{
132			// If we only have 1 match we don't actually do the for loop so set to the first
133			$startPos = $locations[0];
134			$loccount = count($locations);
135			$smallestDiff = PHP_INT_MAX;
136
137			// If we only have 2, skip as it's probably equally relevant
138			if (count($locations) > 2) {
139			// skip the first as we check 1 behind
140			for ($i = 1; $i < $loccount; $i++) {
141			if ($i === $loccount - 1) { // at the end
142			$diff = $locations[$i] - $locations[$i - 1];
143			} else {
144			$diff = $locations[$i + 1] - $locations[$i];
145			}
146
147			if ($smallestDiff > $diff) {
148			$smallestDiff = $diff;
149			$startPos = $locations[$i];
150			}
151			}
152			}
153
154			$startPos = $startPos > $prevCount ? $startPos - $prevCount : 0;
155
156			return $startPos;
157			}
158
159			/**
160			* Center on, and highlight search terms in excerpts.
161			*
162			* @see: http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/
163			*
164			* @param string\|array $words
165			* @param string $fulltext
166			* @param integer $relLength
167			*
168			* @return mixed\|string
169			*/
170			private function extractRelevant($words, $fulltext, $relLength = 300)
171			{
172			if (!is_array($words)) {
173			$words = explode(' ', $words);
174			}
175
176			// 1/6 ratio on prevcount tends to work pretty well and puts the terms
177			// in the middle of the extract
178			$prevCount = floor($relLength / 6);
179
180			$indicator = '…';
181
182			$textlength = strlen($fulltext);
183			if ($textlength <= $relLength) {
184			return $fulltext;
185			}
186
187			$locations = $this->extractLocations($words, $fulltext);
188			$startPos = $this->determineSnipLocation($locations, $prevCount);
189
190			// if we are going to snip too much...
191			if ($textlength - $startPos < $relLength) {
192			$startPos = $startPos - ($textlength - $startPos) / 2;
193			}
194
195			$relText = substr($fulltext, $startPos, $relLength);
196
197			// check to ensure we dont snip the last word if thats the match
198			if ($startPos + $relLength < $textlength) {
199			$relText = substr($relText, 0, strrpos($relText, ' ')) . $indicator; // remove last word
200			}
201
202			// If we trimmed from the front add '…'
203			if ($startPos != 0) {
204			$relText = $indicator . substr($relText, strpos($relText, ' ') + 1); // remove first word
205			}
206
207			// Highlight the words, using the `<mark>` tag.
208			foreach ($words as $word) {
209			$relText = preg_replace('/\b(' . $word . ')\b/i', '<mark>$1</mark>', $relText);
210			}
211
212			return $relText;
213			}
214			}
215

bolt / bolt

Branch — master (c84c45)

Excerpt::extractLocations() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like