fulltext_search   F
last analyzed

Complexity

Total Complexity 63

Size/Duplication

Total Lines 288
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 142
c 1
b 0
f 0
dl 0
loc 288
rs 3.36
wmc 63

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 13 3
B prepareIndexes() 0 31 11
A _getMinWordLength() 0 24 4
A searchSort() 0 8 5
A supportsMethod() 0 22 5
F indexedWordQuery() 0 141 35

How to fix   Complexity   

Complex Class

Complex classes like fulltext_search often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use fulltext_search, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * Simple Machines Forum (SMF)
5
 *
6
 * @package SMF
7
 * @author Simple Machines https://www.simplemachines.org
8
 * @copyright 2023 Simple Machines and individual contributors
9
 * @license https://www.simplemachines.org/about/smf/license.php BSD
10
 *
11
 * @version 2.1.4
12
 */
13
14
if (!defined('SMF'))
15
	die('No direct access...');
16
17
/**
18
 * Class fulltext_search
19
 * Used for fulltext index searching
20
 */
21
class fulltext_search extends search_api
22
{
23
	/**
24
	 * @var array Which words are banned
25
	 */
26
	protected $bannedWords = array();
27
28
	/**
29
	 * @var int The minimum word length
30
	 */
31
	protected $min_word_length = 4;
32
33
	/**
34
	 * @var array Which databases support this method?
35
	 */
36
	protected $supported_databases = array('mysql', 'postgresql');
37
38
	/**
39
	 * The constructor function
40
	 */
41
	public function __construct()
42
	{
43
		global $modSettings, $db_type;
44
45
		// Is this database supported?
46
		if (!in_array($db_type, $this->supported_databases))
47
		{
48
			$this->is_supported = false;
49
			return;
50
		}
51
52
		$this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']);
53
		$this->min_word_length = $this->_getMinWordLength();
54
	}
55
56
	/**
57
	 * {@inheritDoc}
58
	 */
59
	public function supportsMethod($methodName, $query_params = null)
60
	{
61
		$return = false;
62
		switch ($methodName)
63
		{
64
			case 'searchSort':
65
			case 'prepareIndexes':
66
			case 'indexedWordQuery':
67
				$return = true;
68
				break;
69
70
			// All other methods, too bad dunno you.
71
			default:
72
				$return = false;
73
				break;
74
		}
75
76
		// Maybe parent got support
77
		if (!$return)
78
			$return = parent::supportsMethod($methodName, $query_params);
79
80
		return $return;
81
	}
82
83
	/**
84
	 * fulltext_search::_getMinWordLength()
85
	 *
86
	 * What is the minimum word length full text supports?
87
	 *
88
	 * @return int The minimum word length
89
	 */
90
	protected function _getMinWordLength()
91
	{
92
		global $smcFunc, $db_type;
93
94
		if ($db_type == 'postgresql')
95
			return 0;
96
		// Try to determine the minimum number of letters for a fulltext search.
97
		$request = $smcFunc['db_search_query']('max_fulltext_length', '
98
			SHOW VARIABLES
99
			LIKE {string:fulltext_minimum_word_length}',
100
			array(
101
				'fulltext_minimum_word_length' => 'ft_min_word_len',
102
			)
103
		);
104
		if ($request !== false && $smcFunc['db_num_rows']($request) == 1)
105
		{
106
			list (, $min_word_length) = $smcFunc['db_fetch_row']($request);
107
			$smcFunc['db_free_result']($request);
108
		}
109
		// 4 is the MySQL default...
110
		else
111
			$min_word_length = 4;
112
113
		return $min_word_length;
114
	}
115
116
	/**
117
	 * {@inheritDoc}
118
	 */
119
	public function searchSort($a, $b)
120
	{
121
		global $excludedWords, $smcFunc;
122
123
		$x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0);
124
		$y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0);
125
126
		return $x < $y ? 1 : ($x > $y ? -1 : 0);
127
	}
128
129
	/**
130
	 * {@inheritDoc}
131
	 */
132
	public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
133
	{
134
		global $modSettings, $smcFunc;
135
136
		$subwords = text2words($word, null, false);
137
138
		if (empty($modSettings['search_force_index']))
139
		{
140
			// A boolean capable search engine and not forced to only use an index, we may use a non indexed search
141
			// this is harder on the server so we are restrictive here
142
			if (count($subwords) > 1 && preg_match('~[.:@$]~', $word))
143
			{
144
				// using special characters that a full index would ignore and the remaining words are short which would also be ignored
145
				if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length))
146
				{
147
					$wordsSearch['words'][] = trim($word, "/*- ");
148
					$wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
149
				}
150
			}
151
			elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length)
152
			{
153
				// short words have feelings too
154
				$wordsSearch['words'][] = trim($word, "/*- ");
155
				$wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
156
			}
157
		}
158
159
		$fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
160
		$wordsSearch['indexed_words'][] = $fulltextWord;
161
		if ($isExcluded)
162
			$wordsExclude[] = $fulltextWord;
163
	}
164
165
	/**
166
	 * {@inheritDoc}
167
	 */
168
	public function indexedWordQuery(array $words, array $search_data)
169
	{
170
		global $modSettings, $smcFunc;
171
172
		// Specify the function to search with. Regex is for word boundaries.
173
		$is_search_regex = !empty($modSettings['search_match_words']) && !$search_data['no_regexp'];
174
		$query_match_type = $is_search_regex ? 'RLIKE' : 'LIKE';
175
		$word_boundary_wrapper = function(string $str) use ($smcFunc): string
176
		{
177
			return sprintf($smcFunc['db_supports_pcre'] ? '\\b%s\\b' : '[[:<:]]%s[[:>:]]', $str);
178
		};
179
		$escape_sql_regex = function(string $str): string
180
		{
181
			return addcslashes(preg_replace('/[\[\]$.+*?&^|{}()]/', '[$0]', $str), '\\\'');
182
		};
183
184
		$query_select = array(
185
			'id_msg' => 'm.id_msg',
186
		);
187
		$query_where = array();
188
		$query_params = $search_data['params'];
189
190
		if ($smcFunc['db_title'] === POSTGRE_TITLE)
191
			$modSettings['search_simple_fulltext'] = true;
192
193
		if ($query_params['id_search'])
194
			$query_select['id_search'] = '{int:id_search}';
195
196
		$count = 0;
197
		if (empty($modSettings['search_simple_fulltext']))
198
			foreach ($words['words'] as $regularWord)
199
			{
200
				if (in_array($regularWord, $query_params['excluded_words']))
201
					$query_where[] = 'm.body NOT ' . $query_match_type . ' {string:complex_body_' . $count . '}';
202
				else
203
					$query_where[] = 'm.body ' . $query_match_type . ' {string:complex_body_' . $count . '}';
204
205
				if ($is_search_regex)
206
					$query_params['complex_body_' . $count++] = $word_boundary_wrapper($escape_sql_regex($regularWord));
207
				else
208
					$query_params['complex_body_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($regularWord) . '%';
209
			}
210
211
		if ($query_params['user_query'])
212
			$query_where[] = '{raw:user_query}';
213
		if ($query_params['board_query'])
214
			$query_where[] = 'm.id_board {raw:board_query}';
215
216
		if ($query_params['topic'])
217
			$query_where[] = 'm.id_topic = {int:topic}';
218
		if ($query_params['min_msg_id'])
219
			$query_where[] = 'm.id_msg >= {int:min_msg_id}';
220
		if ($query_params['max_msg_id'])
221
			$query_where[] = 'm.id_msg <= {int:max_msg_id}';
222
223
		$count = 0;
224
		if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
225
			foreach ($query_params['excluded_phrases'] as $phrase)
226
			{
227
				$query_where[] = 'subject NOT ' . $query_match_type . ' {string:exclude_subject_phrase_' . $count . '}';
228
229
				if ($is_search_regex)
230
					$query_params['exclude_subject_phrase_' . $count++] = $word_boundary_wrapper($escape_sql_regex($phrase));
231
				else
232
					$query_params['exclude_subject_phrase_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($phrase) . '%';
233
			}
234
		$count = 0;
235
		if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
236
			foreach ($query_params['excluded_subject_words'] as $excludedWord)
237
			{
238
				$query_where[] = 'subject NOT ' . $query_match_type . ' {string:exclude_subject_words_' . $count . '}';
239
240
				if ($is_search_regex)
241
					$query_params['exclude_subject_words_' . $count++] = $word_boundary_wrapper($escape_sql_regex($excludedWord));
242
				else
243
					$query_params['exclude_subject_words_' . $count++] = '%' . $smcFunc['db_escape_wildcard_string']($excludedWord) . '%';
244
			}
245
246
		if (!empty($modSettings['search_simple_fulltext']))
247
		{
248
			if ($smcFunc['db_title'] === POSTGRE_TITLE)
249
			{
250
				$language_ftx = $smcFunc['db_search_language']();
251
252
				$query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:body_match})';
253
				$query_params['language_ftx'] = $language_ftx;
254
			}
255
			else
256
				$query_where[] = 'MATCH (body) AGAINST ({string:body_match})';
257
			$query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words']));
258
		}
259
		else
260
		{
261
			$query_params['boolean_match'] = '';
262
263
			// remove any indexed words that are used in the complex body search terms
264
			$words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']);
265
266
			if ($smcFunc['db_title'] === POSTGRE_TITLE)
267
			{
268
				$row = 0;
269
				foreach ($words['indexed_words'] as $fulltextWord)
270
				{
271
					$query_params['boolean_match'] .= ($row <> 0 ? '&' : '');
272
					$query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '!' : '') . $fulltextWord . ' ';
273
					$row++;
274
				}
275
			}
276
			else
277
				foreach ($words['indexed_words'] as $fulltextWord)
278
					$query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' ';
279
280
			$query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1);
281
282
			// if we have bool terms to search, add them in
283
			if ($query_params['boolean_match'])
284
			{
285
				if ($smcFunc['db_title'] === POSTGRE_TITLE)
286
				{
287
					$language_ftx = $smcFunc['db_search_language']();
288
289
					$query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:boolean_match})';
290
					$query_params['language_ftx'] = $language_ftx;
291
				}
292
				else
293
					$query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)';
294
			}
295
		}
296
297
		$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
298
			INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
299
				(' . implode(', ', array_keys($query_select)) . ')') : '') . '
300
			SELECT ' . implode(', ', $query_select) . '
301
			FROM {db_prefix}messages AS m
302
			WHERE ' . implode('
303
				AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
304
			LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
305
			$query_params
306
		);
307
308
		return $ignoreRequest;
309
	}
310
}
311
312
?>