Highlighter::__construct() - Code Metrics - Inspection of "Merge pull request #11567 from mrclay/11563_routes" - Elgg/Elgg - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( c0a3a7...3b84a4 )

by Jeroen

created 2018-01-08 10:08 UTC

Highlighter::__construct() A

↳ Parent: Highlighter

Complexity

Conditions	1
Paths	1

Size

Total Lines	2
Code Lines	1

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	1
nc	1
nop	1
dl	0
loc	2
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

<?php

namespace Elgg\Search;

/**
 * Highlights relavant substrings in search results
 *
 * @access private
 */
class Highlighter {

	/**
	 * @var array
	 */
	protected $params = [];

	/**
	 * Constructor
	 *
	 * @param array $params Search params
	 *
	 * @access private
	 */
	public function __construct(array $params = []) {
		$this->params = $params;
	}

	/**
	 * Safely highlights search query words found in $string avoiding recursion
	 *
	 * @param string $text Text to highlight
	 *
	 * @return string
	 *
	 * @access public
	 */
	public function highlightWords($text) {

		$text = _elgg_get_display_query($text);

		$i = 1;
		$replace_html = [
			'span' => rand(10000, 99999),
			'class' => rand(10000, 99999),
			'search-highlight' => rand(10000, 99999),
			'search-highlight-color' => rand(10000, 99999)
		];

		$parts = elgg_extract('query_parts', $this->params);

		foreach ($parts as $part) {
			// remove any boolean mode operators
			$part = preg_replace("/([\-\+~])([\w]+)/i", '$2', $part);

			// escape the delimiter and any other regexp special chars
			$part = preg_quote($part, '/');

			$search = "/($part)/i";

			// Must replace with placeholders in case one of the search terms is in the html string.
			// Later will replace the placeholders with the actual html.
			$span = $replace_html['span'];
			$class = $replace_html['class'];
			$highlight = $replace_html['search-highlight'];
			$color = $replace_html['search-highlight-color'];

			$replace = "<$span $class=\"$highlight $color{$i}\">$1</$span>";
			$text = preg_replace($search, $replace, $text);
			$i++;
		}

		foreach ($replace_html as $replace => $search) {
			$text = str_replace($search, $replace, $text);
		}

		return $text;
	}

	/**
	 * Return a string with highlighted matched queries and relevant context
	 * Determines context based upon occurrence and distance of words with each other.
	 *
	 * @todo   This also highlights partials even if partial search is not allowed.
	 *
	 * @param string $text              Text to highlight
	 * @param int    $min_match_context Minimum length of the text to initiate highlighting (default: 30)
	 * @param int    $max_length        Maximum length of the truncated and highlighted text (default: 300)
	 *
	 * @return string
	 *
	 * @access public
	 */
	public function highlight($text, $min_match_context = 30, $max_length = 300) {

		$text = strip_tags($text);

		$haystack_length = elgg_strlen($text);
		$haystack_lc = elgg_strtolower($text);

		$parts = elgg_extract('query_parts', $this->params);

		// if haystack < $max_length return the entire haystack w/formatting immediately
		if ($haystack_length <= $max_length) {
			return $this->highlightWords($text);
		}

		// get the starting positions and lengths for all matching words
		$starts = [];
		$lengths = [];
		foreach ($parts as $part) {
			$part = elgg_strtolower($part);
			$count = elgg_substr_count($haystack_lc, $part);
			$word_len = elgg_strlen($part);
			$haystack_len = elgg_strlen($haystack_lc);

			// find the start positions for the words
			if ($count > 1) {
				$offset = 0;
				while (false !== $pos = elgg_strpos($haystack_lc, $part, $offset)) {
					$start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
					$starts[] = $start;
					$stop = $pos + $word_len + $min_match_context;
					$lengths[] = $stop - $start;
					$offset += $pos + $word_len;

					if ($offset >= $haystack_len) {
						break;
					}
				}
			} else {
				$pos = elgg_strpos($haystack_lc, $part);
				$start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
				$starts[] = $start;
				$stop = $pos + $word_len + $min_match_context;
				$lengths[] = $stop - $start;
			}
		}

		$offsets = $this->consolidateSubstrings($starts, $lengths);

		// figure out if we can adjust the offsets and lengths
		// in order to return more context
		$total_length = array_sum($offsets);

		$add_length = 0;

		if ($total_length < $max_length && $offsets) {

			$add_length = floor((($max_length - $total_length) / count($offsets)) / 2);

			$starts = [];
			$lengths = [];
			foreach ($offsets as $offset => $length) {
				$start = ($offset - $add_length > 0) ? $offset - $add_length : 0;
				$length = $length + $add_length;
				$starts[] = $start;
				$lengths[] = $length;
			}

			$offsets = $this->consolidateSubstrings($starts, $lengths);
		}

		// sort by order of string size descending (which is roughly
		// the proximity of matched terms) so we can keep the
		// substrings with terms closest together and discard
		// the others as needed to fit within $max_length.
		arsort($offsets);

		$return_strs = [];
		$total_length = 0;
		foreach ($offsets as $start => $length) {
			$string = trim(elgg_substr($text, $start, $length));

			// continue past if adding this substring exceeds max length
			if ($total_length + $length > $max_length) {
				continue;
			}

			$total_length += $length;
			$return_strs[$start] = $string;
		}

		// put the strings in order of occurence
		ksort($return_strs);

		// add ...s where needed
		$return = implode('...', $return_strs);
		if (!array_key_exists(0, $return_strs)) {
			$return = "...$return";
		}

		// add to end of string if last substring doesn't hit the end.
		$starts = array_keys($return_strs);
		$last_pos = $starts[count($starts) - 1];
		if ($last_pos + elgg_strlen($return_strs[$last_pos]) < $haystack_length) {
			$return .= '...';
		}

		return $this->highlightWords($return);
	}

	/**
	 * Takes an array of offsets and lengths and consolidates any
	 * overlapping entries, returning an array of new offsets and lengths
	 *
	 * Offsets and lengths are specified in separate arrays because of possible
	 * index collisions with the offsets.
	 *
	 * @param array $offsets offsets
	 * @param array $lengths lengths
	 *
	 * @return array
	 */
	protected function consolidateSubstrings($offsets, $lengths) {
		// sort offsets by occurence
		asort($offsets, SORT_NUMERIC);

		// reset the indexes maintaining association with the original offsets.
		$offsets = array_merge($offsets);

		$new_lengths = [];
		foreach ($offsets as $i => $offset) {
			$new_lengths[] = $lengths[$i];
		}

		$lengths = $new_lengths;

		$return = [];
		$count = count($offsets);
		for ($i = 0; $i < $count; $i++) {
			$offset = $offsets[$i];
			$length = $lengths[$i];
			$end_pos = $offset + $length;

			// find the next entry that doesn't overlap
			while (array_key_exists($i + 1, $offsets) && $end_pos > $offsets[$i + 1]) {
				$i++;
				if (!array_key_exists($i, $offsets)) {
					break;
				}
				$end_pos = $lengths[$i] + $offsets[$i];
			}

			$length = $end_pos - $offset;

			// will never have a colliding offset, so can return as a single array
			$return[$offset] = $length;
		}

		return $return;
	}

}


1		<?php
2
3		namespace Elgg\Search;
4
5		/**
6		* Highlights relavant substrings in search results
7		*
8		* @access private
9		*/
10		class Highlighter {
11
12		/**
13		* @var array
14		*/
15		protected $params = [];
16
17		/**
18		* Constructor
19		*
20		* @param array $params Search params
21		*
22		* @access private
23		*/
24	3	public function __construct(array $params = []) {
25	3	$this->params = $params;
26	3	}
27
28		/**
29		* Safely highlights search query words found in $string avoiding recursion
30		*
31		* @param string $text Text to highlight
32		*
33		* @return string
34		*
35		* @access public
36		*/
37	3	public function highlightWords($text) {
38
39	3	$text = _elgg_get_display_query($text);
40
41	3	$i = 1;
42		$replace_html = [
43	3	'span' => rand(10000, 99999),
44	3	'class' => rand(10000, 99999),
45	3	'search-highlight' => rand(10000, 99999),
46	3	'search-highlight-color' => rand(10000, 99999)
47		];
48
49	3	$parts = elgg_extract('query_parts', $this->params);
50
51	3	foreach ($parts as $part) {
52		// remove any boolean mode operators
53	3	$part = preg_replace("/([\-\+~])([\w]+)/i", '$2', $part);
54
55		// escape the delimiter and any other regexp special chars
56	3	$part = preg_quote($part, '/');
57
58	3	$search = "/($part)/i";
59
60		// Must replace with placeholders in case one of the search terms is in the html string.
61		// Later will replace the placeholders with the actual html.
62	3	$span = $replace_html['span'];
63	3	$class = $replace_html['class'];
64	3	$highlight = $replace_html['search-highlight'];
65	3	$color = $replace_html['search-highlight-color'];
66
67	3	$replace = "<$span $class=\"$highlight $color{$i}\">$1</$span>";
68	3	$text = preg_replace($search, $replace, $text);
69	3	$i++;
70		}
71
72	3	foreach ($replace_html as $replace => $search) {
73	3	$text = str_replace($search, $replace, $text);
74		}
75
76	3	return $text;
77		}
78
79		/**
80		* Return a string with highlighted matched queries and relevant context
81		* Determines context based upon occurrence and distance of words with each other.
82		*
83		* @todo This also highlights partials even if partial search is not allowed.
84		*
85		* @param string $text Text to highlight
86		* @param int $min_match_context Minimum length of the text to initiate highlighting (default: 30)
87		* @param int $max_length Maximum length of the truncated and highlighted text (default: 300)
88		*
89		* @return string
90		*
91		* @access public
92		*/
93	2	public function highlight($text, $min_match_context = 30, $max_length = 300) {
94
95	2	$text = strip_tags($text);
96
97	2	$haystack_length = elgg_strlen($text);
98	2	$haystack_lc = elgg_strtolower($text);
99
100	2	$parts = elgg_extract('query_parts', $this->params);
101
102		// if haystack < $max_length return the entire haystack w/formatting immediately
103	2	if ($haystack_length <= $max_length) {
104	2	return $this->highlightWords($text);
105		}
106
107		// get the starting positions and lengths for all matching words
108	1	$starts = [];
109	1	$lengths = [];
110	1	foreach ($parts as $part) {
111	1	$part = elgg_strtolower($part);
112	1	$count = elgg_substr_count($haystack_lc, $part);
113	1	$word_len = elgg_strlen($part);
114	1	$haystack_len = elgg_strlen($haystack_lc);
115
116		// find the start positions for the words
117	1	if ($count > 1) {
118	1	$offset = 0;
119	1	while (false !== $pos = elgg_strpos($haystack_lc, $part, $offset)) {
120	1	$start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
121	1	$starts[] = $start;
122	1	$stop = $pos + $word_len + $min_match_context;
123	1	$lengths[] = $stop - $start;
124	1	$offset += $pos + $word_len;
125
126	1	if ($offset >= $haystack_len) {
127	1	break;
128		}
129		}
130		} else {
131		$pos = elgg_strpos($haystack_lc, $part);
132		$start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
133		$starts[] = $start;
134		$stop = $pos + $word_len + $min_match_context;
135	1	$lengths[] = $stop - $start;
136		}
137		}
138
139	1	$offsets = $this->consolidateSubstrings($starts, $lengths);
140
141		// figure out if we can adjust the offsets and lengths
142		// in order to return more context
143	1	$total_length = array_sum($offsets);
144
145	1	$add_length = 0;
		0 ignored issues – show Unused Code introduced 2017-10-16 06:19 UTC by Report Bug Copy Issue Report The assignment to `$add_length` is dead and can be removed. Loading history...
146	1	if ($total_length < $max_length && $offsets) {
		0 ignored issues – show Bug Best Practice introduced 2017-10-16 06:19 UTC by Report Bug Copy Issue Report The expression `$offsets` of type `array` is implicitly converted to a boolean; are you sure this is intended? If so, consider using `! empty($expr)` instead to make it clear that you intend to check for an array without elements. This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using `empty(..)` or `! empty(...)` instead. Loading history...
147	1	$add_length = floor((($max_length - $total_length) / count($offsets)) / 2);
148
149	1	$starts = [];
150	1	$lengths = [];
151	1	foreach ($offsets as $offset => $length) {
152	1	$start = ($offset - $add_length > 0) ? $offset - $add_length : 0;
153	1	$length = $length + $add_length;
154	1	$starts[] = $start;
155	1	$lengths[] = $length;
156		}
157
158	1	$offsets = $this->consolidateSubstrings($starts, $lengths);
159		}
160
161		// sort by order of string size descending (which is roughly
162		// the proximity of matched terms) so we can keep the
163		// substrings with terms closest together and discard
164		// the others as needed to fit within $max_length.
165	1	arsort($offsets);
166
167	1	$return_strs = [];
168	1	$total_length = 0;
169	1	foreach ($offsets as $start => $length) {
170	1	$string = trim(elgg_substr($text, $start, $length));
171
172		// continue past if adding this substring exceeds max length
173	1	if ($total_length + $length > $max_length) {
174		continue;
175		}
176
177	1	$total_length += $length;
178	1	$return_strs[$start] = $string;
179		}
180
181		// put the strings in order of occurence
182	1	ksort($return_strs);
183
184		// add ...s where needed
185	1	$return = implode('...', $return_strs);
186	1	if (!array_key_exists(0, $return_strs)) {
187		$return = "...$return";
188		}
189
190		// add to end of string if last substring doesn't hit the end.
191	1	$starts = array_keys($return_strs);
192	1	$last_pos = $starts[count($starts) - 1];
193	1	if ($last_pos + elgg_strlen($return_strs[$last_pos]) < $haystack_length) {
194		$return .= '...';
195		}
196
197	1	return $this->highlightWords($return);
198		}
199
200		/**
201		* Takes an array of offsets and lengths and consolidates any
202		* overlapping entries, returning an array of new offsets and lengths
203		*
204		* Offsets and lengths are specified in separate arrays because of possible
205		* index collisions with the offsets.
206		*
207		* @param array $offsets offsets
208		* @param array $lengths lengths
209		*
210		* @return array
211		*/
212	1	protected function consolidateSubstrings($offsets, $lengths) {
213		// sort offsets by occurence
214	1	asort($offsets, SORT_NUMERIC);
215
216		// reset the indexes maintaining association with the original offsets.
217	1	$offsets = array_merge($offsets);
218
219	1	$new_lengths = [];
220	1	foreach ($offsets as $i => $offset) {
221	1	$new_lengths[] = $lengths[$i];
222		}
223
224	1	$lengths = $new_lengths;
225
226	1	$return = [];
227	1	$count = count($offsets);
228	1	for ($i = 0; $i < $count; $i++) {
229	1	$offset = $offsets[$i];
230	1	$length = $lengths[$i];
231	1	$end_pos = $offset + $length;
232
233		// find the next entry that doesn't overlap
234	1	while (array_key_exists($i + 1, $offsets) && $end_pos > $offsets[$i + 1]) {
235	1	$i++;
236	1	if (!array_key_exists($i, $offsets)) {
237		break;
238		}
239	1	$end_pos = $lengths[$i] + $offsets[$i];
240		}
241
242	1	$length = $end_pos - $offset;
243
244		// will never have a colliding offset, so can return as a single array
245	1	$return[$offset] = $length;
246		}
247
248	1	return $return;
249		}
250
251		}
252

Elgg / Elgg

Push — master ( c0a3a7...3b84a4 )

Highlighter::__construct() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like