albertlast /
SMF2.1
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * Simple Machines Forum (SMF) |
||
| 5 | * |
||
| 6 | * @package SMF |
||
| 7 | * @author Simple Machines http://www.simplemachines.org |
||
| 8 | * @copyright 2017 Simple Machines and individual contributors |
||
| 9 | * @license http://www.simplemachines.org/about/smf/license.php BSD |
||
| 10 | * |
||
| 11 | * @version 2.1 Beta 4 |
||
| 12 | */ |
||
| 13 | |||
| 14 | if (!defined('SMF')) |
||
| 15 | die('No direct access...'); |
||
| 16 | |||
| 17 | /** |
||
| 18 | * Used for the "custom search index" option |
||
| 19 | * Class custom_search |
||
| 20 | */ |
||
| 21 | class custom_search extends search_api |
||
| 22 | { |
||
| 23 | /** |
||
| 24 | * @var array Index settings |
||
| 25 | */ |
||
| 26 | protected $indexSettings = array(); |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @var array An array of banned words |
||
| 30 | */ |
||
| 31 | protected $bannedWords = array(); |
||
| 32 | |||
| 33 | /** |
||
| 34 | * @var int|null Minimum word length (null for no minimum) |
||
| 35 | */ |
||
| 36 | protected $min_word_length = null; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * @var array Which databases support this method |
||
| 40 | */ |
||
| 41 | protected $supported_databases = array('mysql', 'postgresql'); |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Constructor function |
||
| 45 | */ |
||
| 46 | public function __construct() |
||
| 47 | { |
||
| 48 | global $smcFunc, $modSettings, $db_type; |
||
| 49 | |||
| 50 | // Is this database supported? |
||
| 51 | if (!in_array($db_type, $this->supported_databases)) |
||
| 52 | { |
||
| 53 | $this->is_supported = false; |
||
| 54 | return; |
||
| 55 | } |
||
| 56 | |||
| 57 | if (empty($modSettings['search_custom_index_config'])) |
||
| 58 | return; |
||
| 59 | |||
| 60 | $this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 61 | |||
| 62 | $this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']); |
||
| 63 | $this->min_word_length = $this->indexSettings['bytes_per_word']; |
||
| 64 | } |
||
| 65 | |||
| 66 | /** |
||
| 67 | * {@inheritDoc} |
||
| 68 | */ |
||
| 69 | public function supportsMethod($methodName, $query_params = null) |
||
| 70 | { |
||
| 71 | switch ($methodName) |
||
| 72 | { |
||
| 73 | case 'isValid': |
||
| 74 | case 'searchSort': |
||
| 75 | case 'prepareIndexes': |
||
| 76 | case 'indexedWordQuery': |
||
| 77 | case 'postCreated': |
||
| 78 | case 'postModified': |
||
| 79 | return true; |
||
| 80 | break; |
||
| 81 | |||
| 82 | // All other methods, too bad dunno you. |
||
| 83 | default: |
||
| 84 | return false; |
||
| 85 | } |
||
| 86 | } |
||
| 87 | |||
| 88 | /** |
||
| 89 | * {@inheritDoc} |
||
| 90 | */ |
||
| 91 | public function isValid() |
||
| 92 | { |
||
| 93 | global $modSettings; |
||
| 94 | |||
| 95 | return !empty($modSettings['search_custom_index_config']); |
||
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * {@inheritDoc} |
||
| 100 | */ |
||
| 101 | public function searchSort($a, $b) |
||
| 102 | { |
||
| 103 | global $excludedWords; |
||
| 104 | |||
| 105 | $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0); |
||
| 106 | $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0); |
||
| 107 | |||
| 108 | return $y < $x ? 1 : ($y > $x ? -1 : 0); |
||
| 109 | } |
||
| 110 | |||
| 111 | /** |
||
| 112 | * {@inheritDoc} |
||
| 113 | */ |
||
| 114 | public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded) |
||
| 115 | { |
||
| 116 | global $modSettings, $smcFunc; |
||
| 117 | |||
| 118 | $subwords = text2words($word, $this->min_word_length, true); |
||
| 119 | |||
| 120 | if (empty($modSettings['search_force_index'])) |
||
| 121 | $wordsSearch['words'][] = $word; |
||
| 122 | |||
| 123 | // Excluded phrases don't benefit from being split into subwords. |
||
| 124 | if (count($subwords) > 1 && $isExcluded) |
||
| 125 | return; |
||
| 126 | else |
||
| 127 | { |
||
| 128 | foreach ($subwords as $subword) |
||
| 129 | { |
||
| 130 | if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords)) |
||
| 131 | { |
||
| 132 | $wordsSearch['indexed_words'][] = $subword; |
||
| 133 | if ($isExcluded) |
||
| 134 | $wordsExclude[] = $subword; |
||
| 135 | } |
||
| 136 | } |
||
| 137 | } |
||
| 138 | } |
||
| 139 | |||
| 140 | /** |
||
| 141 | * {@inheritDoc} |
||
| 142 | */ |
||
| 143 | public function indexedWordQuery(array $words, array $search_data) |
||
| 144 | { |
||
| 145 | global $modSettings, $smcFunc; |
||
| 146 | |||
| 147 | $query_select = array( |
||
| 148 | 'id_msg' => 'm.id_msg', |
||
| 149 | ); |
||
| 150 | $query_inner_join = array(); |
||
| 151 | $query_left_join = array(); |
||
| 152 | $query_where = array(); |
||
| 153 | $query_params = $search_data['params']; |
||
| 154 | |||
| 155 | if ($query_params['id_search']) |
||
| 156 | $query_select['id_search'] = '{int:id_search}'; |
||
| 157 | |||
| 158 | $count = 0; |
||
| 159 | View Code Duplication | foreach ($words['words'] as $regularWord) |
|
|
0 ignored issues
–
show
|
|||
| 160 | { |
||
| 161 | $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}'; |
||
| 162 | $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; |
||
| 163 | } |
||
| 164 | |||
| 165 | if ($query_params['user_query']) |
||
| 166 | $query_where[] = '{raw:user_query}'; |
||
| 167 | if ($query_params['board_query']) |
||
| 168 | $query_where[] = 'm.id_board {raw:board_query}'; |
||
| 169 | |||
| 170 | if ($query_params['topic']) |
||
| 171 | $query_where[] = 'm.id_topic = {int:topic}'; |
||
| 172 | if ($query_params['min_msg_id']) |
||
| 173 | $query_where[] = 'm.id_msg >= {int:min_msg_id}'; |
||
| 174 | if ($query_params['max_msg_id']) |
||
| 175 | $query_where[] = 'm.id_msg <= {int:max_msg_id}'; |
||
| 176 | |||
| 177 | $count = 0; |
||
| 178 | View Code Duplication | if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 179 | foreach ($query_params['excluded_phrases'] as $phrase) |
||
| 180 | { |
||
| 181 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}'; |
||
| 182 | $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; |
||
| 183 | } |
||
| 184 | $count = 0; |
||
| 185 | View Code Duplication | if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 186 | foreach ($query_params['excluded_subject_words'] as $excludedWord) |
||
| 187 | { |
||
| 188 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}'; |
||
| 189 | $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; |
||
| 190 | } |
||
| 191 | |||
| 192 | $numTables = 0; |
||
| 193 | $prev_join = 0; |
||
| 194 | foreach ($words['indexed_words'] as $indexedWord) |
||
| 195 | { |
||
| 196 | $numTables++; |
||
| 197 | if (in_array($indexedWord, $query_params['excluded_index_words'])) |
||
| 198 | { |
||
| 199 | $query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)'; |
||
| 200 | $query_where[] = '(lsw' . $numTables . '.id_word IS NULL)'; |
||
| 201 | } |
||
| 202 | else |
||
| 203 | { |
||
| 204 | $query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)'; |
||
| 205 | $query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord; |
||
| 206 | $prev_join = $numTables; |
||
| 207 | } |
||
| 208 | } |
||
| 209 | |||
| 210 | $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? (' |
||
| 211 | INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' |
||
| 212 | (' . implode(', ', array_keys($query_select)) . ')') : '') . ' |
||
| 213 | SELECT ' . implode(', ', $query_select) . ' |
||
| 214 | FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : ' |
||
| 215 | INNER JOIN ' . implode(' |
||
| 216 | INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : ' |
||
| 217 | LEFT JOIN ' . implode(' |
||
| 218 | LEFT JOIN ', $query_left_join)) . ' |
||
| 219 | WHERE ' . implode(' |
||
| 220 | AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' |
||
| 221 | LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), |
||
| 222 | $query_params |
||
| 223 | ); |
||
| 224 | |||
| 225 | return $ignoreRequest; |
||
| 226 | } |
||
| 227 | |||
| 228 | /** |
||
| 229 | * {@inheritDoc} |
||
| 230 | */ |
||
| 231 | public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions) |
||
| 232 | { |
||
| 233 | global $modSettings, $smcFunc; |
||
| 234 | |||
| 235 | $customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 236 | |||
| 237 | $inserts = array(); |
||
| 238 | foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word) |
||
| 239 | $inserts[] = array($word, $msgOptions['id']); |
||
| 240 | |||
| 241 | if (!empty($inserts)) |
||
| 242 | $smcFunc['db_insert']('ignore', |
||
| 243 | '{db_prefix}log_search_words', |
||
| 244 | array('id_word' => 'int', 'id_msg' => 'int'), |
||
| 245 | $inserts, |
||
| 246 | array('id_word', 'id_msg') |
||
| 247 | ); |
||
| 248 | } |
||
| 249 | |||
| 250 | /** |
||
| 251 | * {@inheritDoc} |
||
| 252 | */ |
||
| 253 | public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions) |
||
| 254 | { |
||
| 255 | global $modSettings, $smcFunc; |
||
| 256 | |||
| 257 | if (isset($msgOptions['body'])) |
||
| 258 | { |
||
| 259 | $customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true); |
||
| 260 | $stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']); |
||
| 261 | $old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : ''; |
||
| 262 | |||
| 263 | // create thew new and old index |
||
| 264 | $old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true); |
||
| 265 | $new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true); |
||
| 266 | |||
| 267 | // Calculate the words to be added and removed from the index. |
||
| 268 | $removed_words = array_diff(array_diff($old_index, $new_index), $stopwords); |
||
| 269 | $inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords); |
||
| 270 | |||
| 271 | // Delete the removed words AND the added ones to avoid key constraints. |
||
| 272 | if (!empty($removed_words)) |
||
| 273 | { |
||
| 274 | $removed_words = array_merge($removed_words, $inserted_words); |
||
| 275 | $smcFunc['db_query']('', ' |
||
| 276 | DELETE FROM {db_prefix}log_search_words |
||
| 277 | WHERE id_msg = {int:id_msg} |
||
| 278 | AND id_word IN ({array_int:removed_words})', |
||
| 279 | array( |
||
| 280 | 'removed_words' => $removed_words, |
||
| 281 | 'id_msg' => $msgOptions['id'], |
||
| 282 | ) |
||
| 283 | ); |
||
| 284 | } |
||
| 285 | |||
| 286 | // Add the new words to be indexed. |
||
| 287 | if (!empty($inserted_words)) |
||
| 288 | { |
||
| 289 | $inserts = array(); |
||
| 290 | foreach ($inserted_words as $word) |
||
| 291 | $inserts[] = array($word, $msgOptions['id']); |
||
| 292 | $smcFunc['db_insert']('insert', |
||
| 293 | '{db_prefix}log_search_words', |
||
| 294 | array('id_word' => 'string', 'id_msg' => 'int'), |
||
| 295 | $inserts, |
||
| 296 | array('id_word', 'id_msg') |
||
| 297 | ); |
||
| 298 | } |
||
| 299 | } |
||
| 300 | } |
||
| 301 | } |
||
| 302 | |||
| 303 | ?> |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.