Completed
Push — release-2.1 ( 6f6d35...abeae7 )
by Mathias
08:46
created

custom_search::supportsMethod()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 18
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 12
nc 7
nop 2
dl 0
loc 18
rs 8.2222
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Simple Machines Forum (SMF)
5
 *
6
 * @package SMF
7
 * @author Simple Machines http://www.simplemachines.org
8
 * @copyright 2017 Simple Machines and individual contributors
9
 * @license http://www.simplemachines.org/about/smf/license.php BSD
10
 *
11
 * @version 2.1 Beta 4
12
 */
13
14
if (!defined('SMF'))
15
	die('No direct access...');
16
17
/**
18
 * Used for the "custom search index" option
19
 * Class custom_search
20
 */
21
class custom_search extends search_api
22
{
23
	/**
24
	 * @var array Index settings
25
	 */
26
	protected $indexSettings = array();
27
28
	/**
29
	 * @var array An array of banned words
30
	 */
31
	protected $bannedWords = array();
32
33
	/**
34
	 * @var int|null Minimum word length (null for no minimum)
35
	 */
36
	protected $min_word_length = null;
37
38
	/**
39
	 * @var array Which databases support this method
40
	 */
41
	protected $supported_databases = array('mysql', 'postgresql');
42
43
	/**
44
	 * Constructor function
45
	 */
46
	public function __construct()
47
	{
48
		global $smcFunc, $modSettings, $db_type;
49
50
		// Is this database supported?
51
		if (!in_array($db_type, $this->supported_databases))
52
		{
53
			$this->is_supported = false;
54
			return;
55
		}
56
57
		if (empty($modSettings['search_custom_index_config']))
58
			return;
59
60
		$this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
61
62
		$this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
63
		$this->min_word_length = $this->indexSettings['bytes_per_word'];
64
	}
65
66
	/**
67
	 * {@inheritDoc}
68
	 */
69
	public function supportsMethod($methodName, $query_params = null)
70
	{
71
		switch ($methodName)
72
		{
73
			case 'isValid':
74
			case 'searchSort':
75
			case 'prepareIndexes':
76
			case 'indexedWordQuery':
77
			case 'postCreated':
78
			case 'postModified':
79
				return true;
80
			break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
81
82
			// All other methods, too bad dunno you.
83
			default:
84
				return false;
85
		}
86
	}
87
88
	/**
89
	 * {@inheritDoc}
90
	 */
91
	public function isValid()
92
	{
93
		global $modSettings;
94
95
		return !empty($modSettings['search_custom_index_config']);
96
	}
97
98
	/**
99
	 * {@inheritDoc}
100
	 */
101
	public function searchSort($a, $b)
102
	{
103
		global $excludedWords;
104
105
		$x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
106
		$y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
107
108
		return $y < $x ? 1 : ($y > $x ? -1 : 0);
109
	}
110
111
	/**
112
	 * {@inheritDoc}
113
	 */
114
	public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
115
	{
116
		global $modSettings, $smcFunc;
117
118
		$subwords = text2words($word, $this->min_word_length, true);
119
120
		if (empty($modSettings['search_force_index']))
121
			$wordsSearch['words'][] = $word;
122
123
		// Excluded phrases don't benefit from being split into subwords.
124
		if (count($subwords) > 1 && $isExcluded)
125
			return;
126
		else
127
		{
128
			foreach ($subwords as $subword)
129
			{
130
				if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
131
				{
132
					$wordsSearch['indexed_words'][] = $subword;
133
					if ($isExcluded)
134
						$wordsExclude[] = $subword;
135
				}
136
			}
137
		}
138
	}
139
140
	/**
141
	 * {@inheritDoc}
142
	 */
143
	public function indexedWordQuery(array $words, array $search_data)
144
	{
145
		global $modSettings, $smcFunc;
146
147
		$query_select = array(
148
			'id_msg' => 'm.id_msg',
149
		);
150
		$query_inner_join = array();
151
		$query_left_join = array();
152
		$query_where = array();
153
		$query_params = $search_data['params'];
154
155
		if ($query_params['id_search'])
156
			$query_select['id_search'] = '{int:id_search}';
157
158
		$count = 0;
159 View Code Duplication
		foreach ($words['words'] as $regularWord)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
160
		{
161
			$query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
162
			$query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
163
		}
164
165
		if ($query_params['user_query'])
166
			$query_where[] = '{raw:user_query}';
167
		if ($query_params['board_query'])
168
			$query_where[] = 'm.id_board {raw:board_query}';
169
170
		if ($query_params['topic'])
171
			$query_where[] = 'm.id_topic = {int:topic}';
172
		if ($query_params['min_msg_id'])
173
			$query_where[] = 'm.id_msg >= {int:min_msg_id}';
174
		if ($query_params['max_msg_id'])
175
			$query_where[] = 'm.id_msg <= {int:max_msg_id}';
176
177
		$count = 0;
178 View Code Duplication
		if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
179
			foreach ($query_params['excluded_phrases'] as $phrase)
180
			{
181
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
182
				$query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
183
			}
184
		$count = 0;
185 View Code Duplication
		if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
186
			foreach ($query_params['excluded_subject_words'] as $excludedWord)
187
			{
188
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
189
				$query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
190
			}
191
192
		$numTables = 0;
193
		$prev_join = 0;
194
		foreach ($words['indexed_words'] as $indexedWord)
195
		{
196
			$numTables++;
197
			if (in_array($indexedWord, $query_params['excluded_index_words']))
198
			{
199
				$query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
200
				$query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
201
			}
202
			else
203
			{
204
				$query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
205
				$query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
206
				$prev_join = $numTables;
207
			}
208
		}
209
210
		$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
211
			INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
212
				(' . implode(', ', array_keys($query_select)) . ')') : '') . '
213
			SELECT ' . implode(', ', $query_select) . '
214
			FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
215
				INNER JOIN ' . implode('
216
				INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
217
				LEFT JOIN ' . implode('
218
				LEFT JOIN ', $query_left_join)) . '
219
			WHERE ' . implode('
220
				AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
221
			LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
222
			$query_params
223
		);
224
225
		return $ignoreRequest;
226
	}
227
228
	/**
229
	 * {@inheritDoc}
230
	 */
231
	public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
232
	{
233
		global $modSettings, $smcFunc;
234
235
		$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
236
237
		$inserts = array();
238
		foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word)
239
			$inserts[] = array($word, $msgOptions['id']);
240
241
		if (!empty($inserts))
242
			$smcFunc['db_insert']('ignore',
243
				'{db_prefix}log_search_words',
244
				array('id_word' => 'int', 'id_msg' => 'int'),
245
				$inserts,
246
				array('id_word', 'id_msg')
247
			);
248
	}
249
250
	/**
251
	 * {@inheritDoc}
252
	 */
253
	public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
254
	{
255
		global $modSettings, $smcFunc;
256
257
		if (isset($msgOptions['body']))
258
		{
259
			$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
260
			$stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
261
			$old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : '';
262
263
			// create thew new and old index
264
			$old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true);
265
			$new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true);
266
267
			// Calculate the words to be added and removed from the index.
268
			$removed_words = array_diff(array_diff($old_index, $new_index), $stopwords);
269
			$inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords);
270
271
			// Delete the removed words AND the added ones to avoid key constraints.
272
			if (!empty($removed_words))
273
			{
274
				$removed_words = array_merge($removed_words, $inserted_words);
275
				$smcFunc['db_query']('', '
276
					DELETE FROM {db_prefix}log_search_words
277
					WHERE id_msg = {int:id_msg}
278
						AND id_word IN ({array_int:removed_words})',
279
					array(
280
						'removed_words' => $removed_words,
281
						'id_msg' => $msgOptions['id'],
282
					)
283
				);
284
			}
285
286
			// Add the new words to be indexed.
287
			if (!empty($inserted_words))
288
			{
289
				$inserts = array();
290
				foreach ($inserted_words as $word)
291
					$inserts[] = array($word, $msgOptions['id']);
292
				$smcFunc['db_insert']('insert',
293
					'{db_prefix}log_search_words',
294
					array('id_word' => 'string', 'id_msg' => 'int'),
295
					$inserts,
296
					array('id_word', 'id_msg')
297
				);
298
			}
299
		}
300
	}
301
}
302
303
?>
0 ignored issues
show
Best Practice introduced by
It is not recommended to use PHP's closing tag ?> in files other than templates.

Using a closing tag in PHP files that only contain PHP code is not recommended as you might accidentally add whitespace after the closing tag which would then be output by PHP. This can cause severe problems, for example headers cannot be sent anymore.

A simple precaution is to leave off the closing tag as it is not required, and it also has no negative effects whatsoever.

Loading history...