Yoshi2889 /
SMF2.1
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * Simple Machines Forum (SMF) |
||
| 5 | * |
||
| 6 | * @package SMF |
||
| 7 | * @author Simple Machines http://www.simplemachines.org |
||
| 8 | * @copyright 2017 Simple Machines and individual contributors |
||
| 9 | * @license http://www.simplemachines.org/about/smf/license.php BSD |
||
| 10 | * |
||
| 11 | * @version 2.1 Beta 4 |
||
| 12 | */ |
||
| 13 | |||
| 14 | if (!defined('SMF')) |
||
| 15 | die('No direct access...'); |
||
| 16 | |||
| 17 | /** |
||
| 18 | * Used for the "custom search index" option |
||
| 19 | * Class custom_search |
||
| 20 | */ |
||
| 21 | class custom_search extends search_api |
||
|
0 ignored issues
–
show
The property $supported_databases is not named in camelCase.
This check marks property names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked
by a capital letter. Thus the name database connection string becomes Loading history...
This class is not in CamelCase format.
Classes in PHP are usually named in CamelCase. In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. The whole name starts with a capital letter as well. Thus the name database provider becomes Loading history...
|
|||
| 22 | { |
||
| 23 | /** |
||
| 24 | * @var array Index settings |
||
| 25 | */ |
||
| 26 | protected $indexSettings = array(); |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @var array An array of banned words |
||
| 30 | */ |
||
| 31 | protected $bannedWords = array(); |
||
| 32 | |||
| 33 | /** |
||
| 34 | * @var int|null Minimum word length (null for no minimum) |
||
| 35 | */ |
||
| 36 | protected $min_word_length = null; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * @var array Which databases support this method |
||
| 40 | */ |
||
| 41 | protected $supported_databases = array('mysql', 'postgresql'); |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Constructor function |
||
| 45 | */ |
||
| 46 | public function __construct() |
||
| 47 | { |
||
| 48 | global $smcFunc, $modSettings, $db_type; |
||
| 49 | |||
| 50 | // Is this database supported? |
||
| 51 | if (!in_array($db_type, $this->supported_databases)) |
||
| 52 | { |
||
| 53 | $this->is_supported = false; |
||
| 54 | return; |
||
| 55 | } |
||
| 56 | |||
| 57 | if (empty($modSettings['search_custom_index_config'])) |
||
| 58 | return; |
||
| 59 | |||
| 60 | $this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 61 | |||
| 62 | $this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']); |
||
| 63 | $this->min_word_length = $this->indexSettings['bytes_per_word']; |
||
| 64 | } |
||
| 65 | |||
| 66 | /** |
||
| 67 | * {@inheritDoc} |
||
| 68 | */ |
||
| 69 | public function supportsMethod($methodName, $query_params = null) |
||
|
0 ignored issues
–
show
The parameter $query_params is not named in camelCase.
This check marks parameter names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked
by a capital letter. Thus the name database connection string becomes Loading history...
|
|||
| 70 | { |
||
| 71 | switch ($methodName) |
||
| 72 | { |
||
| 73 | case 'isValid': |
||
| 74 | case 'searchSort': |
||
| 75 | case 'prepareIndexes': |
||
| 76 | case 'indexedWordQuery': |
||
| 77 | case 'postCreated': |
||
| 78 | case 'postModified': |
||
| 79 | return true; |
||
| 80 | break; |
||
| 81 | |||
| 82 | // All other methods, too bad dunno you. |
||
| 83 | default: |
||
| 84 | return false; |
||
| 85 | } |
||
| 86 | } |
||
| 87 | |||
| 88 | /** |
||
| 89 | * {@inheritDoc} |
||
| 90 | */ |
||
| 91 | public function isValid() |
||
| 92 | { |
||
| 93 | global $modSettings; |
||
| 94 | |||
| 95 | return !empty($modSettings['search_custom_index_config']); |
||
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * {@inheritDoc} |
||
| 100 | */ |
||
| 101 | public function searchSort($a, $b) |
||
|
0 ignored issues
–
show
|
|||
| 102 | { |
||
| 103 | global $excludedWords; |
||
| 104 | |||
| 105 | $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0); |
||
|
0 ignored issues
–
show
|
|||
| 106 | $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0); |
||
|
0 ignored issues
–
show
|
|||
| 107 | |||
| 108 | return $y < $x ? 1 : ($y > $x ? -1 : 0); |
||
| 109 | } |
||
| 110 | |||
| 111 | /** |
||
| 112 | * {@inheritDoc} |
||
| 113 | */ |
||
| 114 | public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded) |
||
| 115 | { |
||
| 116 | global $modSettings, $smcFunc; |
||
| 117 | |||
| 118 | $subwords = text2words($word, $this->min_word_length, true); |
||
| 119 | |||
| 120 | if (empty($modSettings['search_force_index'])) |
||
| 121 | $wordsSearch['words'][] = $word; |
||
| 122 | |||
| 123 | // Excluded phrases don't benefit from being split into subwords. |
||
| 124 | if (count($subwords) > 1 && $isExcluded) |
||
| 125 | return; |
||
| 126 | else |
||
| 127 | { |
||
| 128 | foreach ($subwords as $subword) |
||
| 129 | { |
||
| 130 | if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords)) |
||
| 131 | { |
||
| 132 | $wordsSearch['indexed_words'][] = $subword; |
||
| 133 | if ($isExcluded) |
||
| 134 | $wordsExclude[] = $subword; |
||
| 135 | } |
||
| 136 | } |
||
| 137 | } |
||
| 138 | } |
||
| 139 | |||
| 140 | /** |
||
| 141 | * {@inheritDoc} |
||
| 142 | */ |
||
| 143 | public function indexedWordQuery(array $words, array $search_data) |
||
|
0 ignored issues
–
show
The parameter $search_data is not named in camelCase.
This check marks parameter names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked
by a capital letter. Thus the name database connection string becomes Loading history...
|
|||
| 144 | { |
||
| 145 | global $modSettings, $smcFunc; |
||
| 146 | |||
| 147 | $query_select = array( |
||
| 148 | 'id_msg' => 'm.id_msg', |
||
| 149 | ); |
||
| 150 | $query_inner_join = array(); |
||
| 151 | $query_left_join = array(); |
||
| 152 | $query_where = array(); |
||
| 153 | $query_params = $search_data['params']; |
||
| 154 | |||
| 155 | if ($query_params['id_search']) |
||
| 156 | $query_select['id_search'] = '{int:id_search}'; |
||
| 157 | |||
| 158 | $count = 0; |
||
| 159 | View Code Duplication | foreach ($words['words'] as $regularWord) |
|
| 160 | { |
||
| 161 | $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}'; |
||
| 162 | $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; |
||
|
0 ignored issues
–
show
|
|||
| 163 | } |
||
| 164 | |||
| 165 | if ($query_params['user_query']) |
||
| 166 | $query_where[] = '{raw:user_query}'; |
||
| 167 | if ($query_params['board_query']) |
||
| 168 | $query_where[] = 'm.id_board {raw:board_query}'; |
||
| 169 | |||
| 170 | if ($query_params['topic']) |
||
| 171 | $query_where[] = 'm.id_topic = {int:topic}'; |
||
| 172 | if ($query_params['min_msg_id']) |
||
| 173 | $query_where[] = 'm.id_msg >= {int:min_msg_id}'; |
||
| 174 | if ($query_params['max_msg_id']) |
||
| 175 | $query_where[] = 'm.id_msg <= {int:max_msg_id}'; |
||
| 176 | |||
| 177 | $count = 0; |
||
| 178 | View Code Duplication | if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) |
|
| 179 | foreach ($query_params['excluded_phrases'] as $phrase) |
||
| 180 | { |
||
| 181 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}'; |
||
| 182 | $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; |
||
|
0 ignored issues
–
show
|
|||
| 183 | } |
||
| 184 | $count = 0; |
||
| 185 | View Code Duplication | if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) |
|
| 186 | foreach ($query_params['excluded_subject_words'] as $excludedWord) |
||
| 187 | { |
||
| 188 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}'; |
||
| 189 | $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; |
||
|
0 ignored issues
–
show
|
|||
| 190 | } |
||
| 191 | |||
| 192 | $numTables = 0; |
||
| 193 | $prev_join = 0; |
||
| 194 | foreach ($words['indexed_words'] as $indexedWord) |
||
| 195 | { |
||
| 196 | $numTables++; |
||
| 197 | if (in_array($indexedWord, $query_params['excluded_index_words'])) |
||
| 198 | { |
||
| 199 | $query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)'; |
||
| 200 | $query_where[] = '(lsw' . $numTables . '.id_word IS NULL)'; |
||
| 201 | } |
||
| 202 | else |
||
| 203 | { |
||
| 204 | $query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)'; |
||
| 205 | $query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord; |
||
| 206 | $prev_join = $numTables; |
||
| 207 | } |
||
| 208 | } |
||
| 209 | |||
| 210 | $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? (' |
||
| 211 | INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' |
||
| 212 | (' . implode(', ', array_keys($query_select)) . ')') : '') . ' |
||
| 213 | SELECT ' . implode(', ', $query_select) . ' |
||
| 214 | FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : ' |
||
| 215 | INNER JOIN ' . implode(' |
||
| 216 | INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : ' |
||
| 217 | LEFT JOIN ' . implode(' |
||
| 218 | LEFT JOIN ', $query_left_join)) . ' |
||
| 219 | WHERE ' . implode(' |
||
| 220 | AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' |
||
| 221 | LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), |
||
| 222 | $query_params |
||
| 223 | ); |
||
| 224 | |||
| 225 | return $ignoreRequest; |
||
| 226 | } |
||
| 227 | |||
| 228 | /** |
||
| 229 | * {@inheritDoc} |
||
| 230 | */ |
||
| 231 | public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions) |
||
| 232 | { |
||
| 233 | global $modSettings, $smcFunc; |
||
| 234 | |||
| 235 | $customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 236 | |||
| 237 | $inserts = array(); |
||
| 238 | foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word) |
||
| 239 | $inserts[] = array($word, $msgOptions['id']); |
||
| 240 | |||
| 241 | if (!empty($inserts)) |
||
| 242 | $smcFunc['db_insert']('ignore', |
||
| 243 | '{db_prefix}log_search_words', |
||
| 244 | array('id_word' => 'int', 'id_msg' => 'int'), |
||
| 245 | $inserts, |
||
| 246 | array('id_word', 'id_msg') |
||
| 247 | ); |
||
| 248 | } |
||
| 249 | |||
| 250 | /** |
||
| 251 | * {@inheritDoc} |
||
| 252 | */ |
||
| 253 | public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions) |
||
| 254 | { |
||
| 255 | global $modSettings, $smcFunc; |
||
| 256 | |||
| 257 | if (isset($msgOptions['body'])) |
||
| 258 | { |
||
| 259 | $customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 260 | $stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']); |
||
| 261 | $old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : ''; |
||
| 262 | |||
| 263 | // create thew new and old index |
||
| 264 | $old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true); |
||
| 265 | $new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true); |
||
| 266 | |||
| 267 | // Calculate the words to be added and removed from the index. |
||
| 268 | $removed_words = array_diff(array_diff($old_index, $new_index), $stopwords); |
||
| 269 | $inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords); |
||
| 270 | |||
| 271 | // Delete the removed words AND the added ones to avoid key constraints. |
||
| 272 | if (!empty($removed_words)) |
||
| 273 | { |
||
| 274 | $removed_words = array_merge($removed_words, $inserted_words); |
||
| 275 | $smcFunc['db_query']('', ' |
||
| 276 | DELETE FROM {db_prefix}log_search_words |
||
| 277 | WHERE id_msg = {int:id_msg} |
||
| 278 | AND id_word IN ({array_int:removed_words})', |
||
| 279 | array( |
||
| 280 | 'removed_words' => $removed_words, |
||
| 281 | 'id_msg' => $msgOptions['id'], |
||
| 282 | ) |
||
| 283 | ); |
||
| 284 | } |
||
| 285 | |||
| 286 | // Add the new words to be indexed. |
||
| 287 | if (!empty($inserted_words)) |
||
| 288 | { |
||
| 289 | $inserts = array(); |
||
| 290 | foreach ($inserted_words as $word) |
||
| 291 | $inserts[] = array($word, $msgOptions['id']); |
||
| 292 | $smcFunc['db_insert']('insert', |
||
| 293 | '{db_prefix}log_search_words', |
||
| 294 | array('id_word' => 'string', 'id_msg' => 'int'), |
||
| 295 | $inserts, |
||
| 296 | array('id_word', 'id_msg') |
||
| 297 | ); |
||
| 298 | } |
||
| 299 | } |
||
| 300 | } |
||
| 301 | } |
||
| 302 | |||
| 303 | ?> |
This check marks property names that have not been written in camelCase.
In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes
databaseConnectionString.