Failed Conditions
Branch release-2.1 (4e22cf)
by Rick
07:22
created

custom_search::supportsMethod()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 18
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 12
nc 7
nop 2
dl 0
loc 18
rs 8.2222
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Simple Machines Forum (SMF)
5
 *
6
 * @package SMF
7
 * @author Simple Machines http://www.simplemachines.org
8
 * @copyright 2017 Simple Machines and individual contributors
9
 * @license http://www.simplemachines.org/about/smf/license.php BSD
10
 *
11
 * @version 2.1 Beta 4
12
 */
13
14
if (!defined('SMF'))
15
	die('No direct access...');
16
17
/**
18
 * Used for the "custom search index" option
19
 * Class custom_search
20
 */
21
class custom_search extends search_api
0 ignored issues
show
Coding Style introduced by
The property $min_word_length is not named in camelCase.

This check marks property names that have not been written in camelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes databaseConnectionString.

Loading history...
Coding Style introduced by
The property $supported_databases is not named in camelCase.

This check marks property names that have not been written in camelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes databaseConnectionString.

Loading history...
Coding Style introduced by
This class is not in CamelCase format.

Classes in PHP are usually named in CamelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. The whole name starts with a capital letter as well.

Thus the name database provider becomes DatabaseProvider.

Loading history...
22
{
23
	/**
24
	 * @var array Index settings
25
	 */
26
	protected $indexSettings = array();
27
28
	/**
29
	 * @var array An array of banned words
30
	 */
31
	protected $bannedWords = array();
32
33
	/**
34
	 * @var int|null Minimum word length (null for no minimum)
35
	 */
36
	protected $min_word_length = null;
37
38
	/**
39
	 * @var array Which databases support this method
40
	 */
41
	protected $supported_databases = array('mysql', 'postgresql');
42
43
	/**
44
	 * Constructor function
45
	 */
46
	public function __construct()
47
	{
48
		global $smcFunc, $modSettings, $db_type;
49
50
		// Is this database supported?
51
		if (!in_array($db_type, $this->supported_databases))
52
		{
53
			$this->is_supported = false;
54
			return;
55
		}
56
57
		if (empty($modSettings['search_custom_index_config']))
58
			return;
59
60
		$this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
61
62
		$this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
63
		$this->min_word_length = $this->indexSettings['bytes_per_word'];
64
	}
65
66
	/**
67
	 * {@inheritDoc}
68
	 */
69
	public function supportsMethod($methodName, $query_params = null)
0 ignored issues
show
Coding Style Naming introduced by
The parameter $query_params is not named in camelCase.

This check marks parameter names that have not been written in camelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes databaseConnectionString.

Loading history...
70
	{
71
		switch ($methodName)
72
		{
73
			case 'isValid':
74
			case 'searchSort':
75
			case 'prepareIndexes':
76
			case 'indexedWordQuery':
77
			case 'postCreated':
78
			case 'postModified':
79
				return true;
80
			break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
81
82
			// All other methods, too bad dunno you.
83
			default:
84
				return false;
85
		}
86
	}
87
88
	/**
89
	 * {@inheritDoc}
90
	 */
91
	public function isValid()
92
	{
93
		global $modSettings;
94
95
		return !empty($modSettings['search_custom_index_config']);
96
	}
97
98
	/**
99
	 * {@inheritDoc}
100
	 */
101
	public function searchSort($a, $b)
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $a. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
Comprehensibility introduced by
Avoid variables with short names like $b. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
102
	{
103
		global $excludedWords;
104
105
		$x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $x. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
106
		$y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $y. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
107
108
		return $y < $x ? 1 : ($y > $x ? -1 : 0);
109
	}
110
111
	/**
112
	 * {@inheritDoc}
113
	 */
114
	public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
115
	{
116
		global $modSettings, $smcFunc;
117
118
		$subwords = text2words($word, $this->min_word_length, true);
119
120
		if (empty($modSettings['search_force_index']))
121
			$wordsSearch['words'][] = $word;
122
123
		// Excluded phrases don't benefit from being split into subwords.
124
		if (count($subwords) > 1 && $isExcluded)
125
			return;
126
		else
127
		{
128
			foreach ($subwords as $subword)
129
			{
130
				if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
131
				{
132
					$wordsSearch['indexed_words'][] = $subword;
133
					if ($isExcluded)
134
						$wordsExclude[] = $subword;
135
				}
136
			}
137
		}
138
	}
139
140
	/**
141
	 * {@inheritDoc}
142
	 */
143
	public function indexedWordQuery(array $words, array $search_data)
0 ignored issues
show
Coding Style Naming introduced by
The parameter $search_data is not named in camelCase.

This check marks parameter names that have not been written in camelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes databaseConnectionString.

Loading history...
144
	{
145
		global $modSettings, $smcFunc;
146
147
		$query_select = array(
148
			'id_msg' => 'm.id_msg',
149
		);
150
		$query_inner_join = array();
151
		$query_left_join = array();
152
		$query_where = array();
153
		$query_params = $search_data['params'];
154
155
		if ($query_params['id_search'])
156
			$query_select['id_search'] = '{int:id_search}';
157
158
		$count = 0;
159 View Code Duplication
		foreach ($words['words'] as $regularWord)
160
		{
161
			$query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
162
			$query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
163
		}
164
165
		if ($query_params['user_query'])
166
			$query_where[] = '{raw:user_query}';
167
		if ($query_params['board_query'])
168
			$query_where[] = 'm.id_board {raw:board_query}';
169
170
		if ($query_params['topic'])
171
			$query_where[] = 'm.id_topic = {int:topic}';
172
		if ($query_params['min_msg_id'])
173
			$query_where[] = 'm.id_msg >= {int:min_msg_id}';
174
		if ($query_params['max_msg_id'])
175
			$query_where[] = 'm.id_msg <= {int:max_msg_id}';
176
177
		$count = 0;
178 View Code Duplication
		if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
179
			foreach ($query_params['excluded_phrases'] as $phrase)
180
			{
181
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
182
				$query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
183
			}
184
		$count = 0;
185 View Code Duplication
		if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
186
			foreach ($query_params['excluded_subject_words'] as $excludedWord)
187
			{
188
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
189
				$query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
190
			}
191
192
		$numTables = 0;
193
		$prev_join = 0;
194
		foreach ($words['indexed_words'] as $indexedWord)
195
		{
196
			$numTables++;
197
			if (in_array($indexedWord, $query_params['excluded_index_words']))
198
			{
199
				$query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
200
				$query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
201
			}
202
			else
203
			{
204
				$query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
205
				$query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
206
				$prev_join = $numTables;
207
			}
208
		}
209
210
		$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
211
			INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
212
				(' . implode(', ', array_keys($query_select)) . ')') : '') . '
213
			SELECT ' . implode(', ', $query_select) . '
214
			FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
215
				INNER JOIN ' . implode('
216
				INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
217
				LEFT JOIN ' . implode('
218
				LEFT JOIN ', $query_left_join)) . '
219
			WHERE ' . implode('
220
				AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
221
			LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
222
			$query_params
223
		);
224
225
		return $ignoreRequest;
226
	}
227
228
	/**
229
	 * {@inheritDoc}
230
	 */
231
	public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
232
	{
233
		global $modSettings, $smcFunc;
234
235
		$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
236
237
		$inserts = array();
238
		foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word)
239
			$inserts[] = array($word, $msgOptions['id']);
240
241
		if (!empty($inserts))
242
			$smcFunc['db_insert']('ignore',
243
				'{db_prefix}log_search_words',
244
				array('id_word' => 'int', 'id_msg' => 'int'),
245
				$inserts,
246
				array('id_word', 'id_msg')
247
			);
248
	}
249
250
	/**
251
	 * {@inheritDoc}
252
	 */
253
	public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
254
	{
255
		global $modSettings, $smcFunc;
256
257
		if (isset($msgOptions['body']))
258
		{
259
			$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
260
			$stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
261
			$old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : '';
262
263
			// create thew new and old index
264
			$old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true);
265
			$new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true);
266
267
			// Calculate the words to be added and removed from the index.
268
			$removed_words = array_diff(array_diff($old_index, $new_index), $stopwords);
269
			$inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords);
270
271
			// Delete the removed words AND the added ones to avoid key constraints.
272
			if (!empty($removed_words))
273
			{
274
				$removed_words = array_merge($removed_words, $inserted_words);
275
				$smcFunc['db_query']('', '
276
					DELETE FROM {db_prefix}log_search_words
277
					WHERE id_msg = {int:id_msg}
278
						AND id_word IN ({array_int:removed_words})',
279
					array(
280
						'removed_words' => $removed_words,
281
						'id_msg' => $msgOptions['id'],
282
					)
283
				);
284
			}
285
286
			// Add the new words to be indexed.
287
			if (!empty($inserted_words))
288
			{
289
				$inserts = array();
290
				foreach ($inserted_words as $word)
291
					$inserts[] = array($word, $msgOptions['id']);
292
				$smcFunc['db_insert']('insert',
293
					'{db_prefix}log_search_words',
294
					array('id_word' => 'string', 'id_msg' => 'int'),
295
					$inserts,
296
					array('id_word', 'id_msg')
297
				);
298
			}
299
		}
300
	}
301
}
302
303
?>