Completed
Push — release-2.1 ( 02c5c7...5a39ee )
by Colin
09:26
created

custom_search::prepareIndexes()   C

Complexity

Conditions 8
Paths 10

Size

Total Lines 25
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 13
nc 10
nop 4
dl 0
loc 25
rs 5.3846
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Simple Machines Forum (SMF)
5
 *
6
 * @package SMF
7
 * @author Simple Machines http://www.simplemachines.org
8
 * @copyright 2018 Simple Machines and individual contributors
9
 * @license http://www.simplemachines.org/about/smf/license.php BSD
10
 *
11
 * @version 2.1 Beta 4
12
 */
13
14
if (!defined('SMF'))
15
	die('No direct access...');
16
17
/**
18
 * Used for the "custom search index" option
19
 * Class custom_search
20
 */
21
class custom_search extends search_api
22
{
23
	/**
24
	 * @var array Index settings
25
	 */
26
	protected $indexSettings = array();
27
28
	/**
29
	 * @var array An array of banned words
30
	 */
31
	protected $bannedWords = array();
32
33
	/**
34
	 * @var int|null Minimum word length (null for no minimum)
35
	 */
36
	protected $min_word_length = null;
37
38
	/**
39
	 * @var array Which databases support this method
40
	 */
41
	protected $supported_databases = array('mysql', 'postgresql');
42
43
	/**
44
	 * Constructor function
45
	 */
46
	public function __construct()
47
	{
48
		global $smcFunc, $modSettings, $db_type;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
49
50
		// Is this database supported?
51
		if (!in_array($db_type, $this->supported_databases))
52
		{
53
			$this->is_supported = false;
54
			return;
55
		}
56
57
		if (empty($modSettings['search_custom_index_config']))
58
			return;
59
60
		$this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
61
62
		$this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
63
		$this->min_word_length = $this->indexSettings['bytes_per_word'];
64
	}
65
66
	/**
67
	 * {@inheritDoc}
68
	 */
69 View Code Duplication
	public function supportsMethod($methodName, $query_params = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
70
	{
71
		$return = false;
0 ignored issues
show
Unused Code introduced by
$return is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
72
		switch ($methodName)
73
		{
74
			case 'isValid':
75
			case 'searchSort':
76
			case 'prepareIndexes':
77
			case 'indexedWordQuery':
78
			case 'postCreated':
79
			case 'postModified':
80
				$return = true;
81
			break;
82
83
			// All other methods, too bad dunno you.
84
			default:
85
				$return = false;
86
		}
87
88
		// Maybe parent got support
89
		if (!$return)
90
			$return = parent::supportsMethod($methodName, $query_params);
91
92
		return $return;
93
	}
94
95
	/**
96
	 * {@inheritDoc}
97
	 */
98
	public function isValid()
99
	{
100
		global $modSettings;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
101
102
		return !empty($modSettings['search_custom_index_config']);
103
	}
104
105
	/**
106
	 * {@inheritDoc}
107
	 */
108
	public function searchSort($a, $b)
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $a. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
Comprehensibility introduced by
Avoid variables with short names like $b. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
109
	{
110
		global $excludedWords;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
111
112
		$x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $x. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
113
		$y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $y. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
114
115
		return $y < $x ? 1 : ($y > $x ? -1 : 0);
116
	}
117
118
	/**
119
	 * {@inheritDoc}
120
	 */
121
	public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
122
	{
123
		global $modSettings, $smcFunc;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
124
125
		$subwords = text2words($word, $this->min_word_length, true);
126
127
		if (empty($modSettings['search_force_index']))
128
			$wordsSearch['words'][] = $word;
129
130
		// Excluded phrases don't benefit from being split into subwords.
131
		if (count($subwords) > 1 && $isExcluded)
132
			return;
133
		else
134
		{
135
			foreach ($subwords as $subword)
136
			{
137
				if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
138
				{
139
					$wordsSearch['indexed_words'][] = $subword;
140
					if ($isExcluded)
141
						$wordsExclude[] = $subword;
142
				}
143
			}
144
		}
145
	}
146
147
	/**
148
	 * {@inheritDoc}
149
	 */
150
	public function indexedWordQuery(array $words, array $search_data)
151
	{
152
		global $modSettings, $smcFunc;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
153
154
		$query_select = array(
155
			'id_msg' => 'm.id_msg',
156
		);
157
		$query_inner_join = array();
158
		$query_left_join = array();
159
		$query_where = array();
160
		$query_params = $search_data['params'];
161
162
		if ($query_params['id_search'])
163
			$query_select['id_search'] = '{int:id_search}';
164
165
		$count = 0;
166 View Code Duplication
		foreach ($words['words'] as $regularWord)
167
		{
168
			$query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
169
			$query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
170
		}
171
172
		if ($query_params['user_query'])
173
			$query_where[] = '{raw:user_query}';
174
		if ($query_params['board_query'])
175
			$query_where[] = 'm.id_board {raw:board_query}';
176
177
		if ($query_params['topic'])
178
			$query_where[] = 'm.id_topic = {int:topic}';
179
		if ($query_params['min_msg_id'])
180
			$query_where[] = 'm.id_msg >= {int:min_msg_id}';
181
		if ($query_params['max_msg_id'])
182
			$query_where[] = 'm.id_msg <= {int:max_msg_id}';
183
184
		$count = 0;
185 View Code Duplication
		if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
186
			foreach ($query_params['excluded_phrases'] as $phrase)
187
			{
188
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
189
				$query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
190
			}
191
		$count = 0;
192 View Code Duplication
		if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
193
			foreach ($query_params['excluded_subject_words'] as $excludedWord)
194
			{
195
				$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
196
				$query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
0 ignored issues
show
Coding Style introduced by
Increment and decrement operators must be bracketed when used in string concatenation
Loading history...
197
			}
198
199
		$numTables = 0;
200
		$prev_join = 0;
201
		foreach ($words['indexed_words'] as $indexedWord)
202
		{
203
			$numTables++;
204
			if (in_array($indexedWord, $query_params['excluded_index_words']))
205
			{
206
				$query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
207
				$query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
208
			}
209
			else
210
			{
211
				$query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
212
				$query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
213
				$prev_join = $numTables;
214
			}
215
		}
216
217
		$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
218
			INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
219
				(' . implode(', ', array_keys($query_select)) . ')') : '') . '
220
			SELECT ' . implode(', ', $query_select) . '
221
			FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
222
				INNER JOIN ' . implode('
223
				INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
224
				LEFT JOIN ' . implode('
225
				LEFT JOIN ', $query_left_join)) . '
226
			WHERE ' . implode('
227
				AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
228
			LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
229
			$query_params
230
		);
231
232
		return $ignoreRequest;
233
	}
234
235
	/**
236
	 * {@inheritDoc}
237
	 */
238
	public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
239
	{
240
		global $modSettings, $smcFunc;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
241
242
		$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
243
244
		$inserts = array();
245
		foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word)
246
			$inserts[] = array($word, $msgOptions['id']);
247
248
		if (!empty($inserts))
249
			$smcFunc['db_insert']('ignore',
250
				'{db_prefix}log_search_words',
251
				array('id_word' => 'int', 'id_msg' => 'int'),
252
				$inserts,
253
				array('id_word', 'id_msg')
254
			);
255
	}
256
257
	/**
258
	 * {@inheritDoc}
259
	 */
260
	public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
261
	{
262
		global $modSettings, $smcFunc;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
263
264
		if (isset($msgOptions['body']))
265
		{
266
			$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
267
			$stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
268
			$old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : '';
269
270
			// create thew new and old index
271
			$old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true);
272
			$new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true);
273
274
			// Calculate the words to be added and removed from the index.
275
			$removed_words = array_diff(array_diff($old_index, $new_index), $stopwords);
276
			$inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords);
277
278
			// Delete the removed words AND the added ones to avoid key constraints.
279
			if (!empty($removed_words))
280
			{
281
				$removed_words = array_merge($removed_words, $inserted_words);
282
				$smcFunc['db_query']('', '
283
					DELETE FROM {db_prefix}log_search_words
284
					WHERE id_msg = {int:id_msg}
285
						AND id_word IN ({array_int:removed_words})',
286
					array(
287
						'removed_words' => $removed_words,
288
						'id_msg' => $msgOptions['id'],
289
					)
290
				);
291
			}
292
293
			// Add the new words to be indexed.
294
			if (!empty($inserted_words))
295
			{
296
				$inserts = array();
297
				foreach ($inserted_words as $word)
298
					$inserts[] = array($word, $msgOptions['id']);
299
				$smcFunc['db_insert']('insert',
300
					'{db_prefix}log_search_words',
301
					array('id_word' => 'string', 'id_msg' => 'int'),
302
					$inserts,
303
					array('id_word', 'id_msg')
304
				);
305
			}
306
		}
307
	}
308
}
309
310
?>