albertlast /
SMF2.1
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * Simple Machines Forum (SMF) |
||
| 5 | * |
||
| 6 | * @package SMF |
||
| 7 | * @author Simple Machines http://www.simplemachines.org |
||
| 8 | * @copyright 2017 Simple Machines and individual contributors |
||
| 9 | * @license http://www.simplemachines.org/about/smf/license.php BSD |
||
| 10 | * |
||
| 11 | * @version 2.1 Beta 4 |
||
| 12 | */ |
||
| 13 | |||
| 14 | if (!defined('SMF')) |
||
| 15 | die('No direct access...'); |
||
| 16 | |||
| 17 | /** |
||
| 18 | * Class fulltext_search |
||
| 19 | * Used for fulltext index searching |
||
| 20 | */ |
||
| 21 | class fulltext_search extends search_api |
||
| 22 | { |
||
| 23 | /** |
||
| 24 | * @var array Which words are banned |
||
| 25 | */ |
||
| 26 | protected $bannedWords = array(); |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @var int The minimum word length |
||
| 30 | */ |
||
| 31 | protected $min_word_length = 4; |
||
| 32 | |||
| 33 | /** |
||
| 34 | * @var array Which databases support this method? |
||
| 35 | */ |
||
| 36 | protected $supported_databases = array('mysql'); |
||
| 37 | |||
| 38 | /** |
||
| 39 | * The constructor function |
||
| 40 | */ |
||
| 41 | public function __construct() |
||
| 42 | { |
||
| 43 | global $modSettings, $db_type; |
||
| 44 | |||
| 45 | // Is this database supported? |
||
| 46 | if (!in_array($db_type, $this->supported_databases)) |
||
| 47 | { |
||
| 48 | $this->is_supported = false; |
||
| 49 | return; |
||
| 50 | } |
||
| 51 | |||
| 52 | $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']); |
||
| 53 | $this->min_word_length = $this->_getMinWordLength(); |
||
| 54 | } |
||
| 55 | |||
| 56 | /** |
||
| 57 | * {@inheritDoc} |
||
| 58 | */ |
||
| 59 | public function supportsMethod($methodName, $query_params = null) |
||
| 60 | { |
||
| 61 | switch ($methodName) |
||
| 62 | { |
||
| 63 | case 'searchSort': |
||
| 64 | case 'prepareIndexes': |
||
| 65 | case 'indexedWordQuery': |
||
| 66 | return true; |
||
| 67 | break; |
||
| 68 | |||
| 69 | // All other methods, too bad dunno you. |
||
| 70 | default: |
||
| 71 | return false; |
||
| 72 | break; |
||
| 73 | } |
||
| 74 | } |
||
| 75 | |||
| 76 | /** |
||
| 77 | * fulltext_search::_getMinWordLength() |
||
| 78 | * |
||
| 79 | * What is the minimum word length full text supports? |
||
| 80 | * |
||
| 81 | * @return int The minimum word length |
||
| 82 | */ |
||
| 83 | protected function _getMinWordLength() |
||
| 84 | { |
||
| 85 | global $smcFunc; |
||
| 86 | |||
| 87 | // Try to determine the minimum number of letters for a fulltext search. |
||
| 88 | $request = $smcFunc['db_search_query']('max_fulltext_length', ' |
||
| 89 | SHOW VARIABLES |
||
| 90 | LIKE {string:fulltext_minimum_word_length}', |
||
| 91 | array( |
||
| 92 | 'fulltext_minimum_word_length' => 'ft_min_word_len', |
||
| 93 | ) |
||
| 94 | ); |
||
| 95 | View Code Duplication | if ($request !== false && $smcFunc['db_num_rows']($request) == 1) |
|
|
0 ignored issues
–
show
|
|||
| 96 | { |
||
| 97 | list (, $min_word_length) = $smcFunc['db_fetch_row']($request); |
||
| 98 | $smcFunc['db_free_result']($request); |
||
| 99 | } |
||
| 100 | // 4 is the MySQL default... |
||
| 101 | else |
||
| 102 | $min_word_length = 4; |
||
| 103 | |||
| 104 | return $min_word_length; |
||
| 105 | } |
||
| 106 | |||
| 107 | /** |
||
| 108 | * {@inheritDoc} |
||
| 109 | */ |
||
| 110 | public function searchSort($a, $b) |
||
| 111 | { |
||
| 112 | global $excludedWords, $smcFunc; |
||
| 113 | |||
| 114 | $x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0); |
||
| 115 | $y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0); |
||
| 116 | |||
| 117 | return $x < $y ? 1 : ($x > $y ? -1 : 0); |
||
| 118 | } |
||
| 119 | |||
| 120 | /** |
||
| 121 | * {@inheritDoc} |
||
| 122 | */ |
||
| 123 | public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded) |
||
| 124 | { |
||
| 125 | global $modSettings, $smcFunc; |
||
| 126 | |||
| 127 | $subwords = text2words($word, null, false); |
||
| 128 | |||
| 129 | if (empty($modSettings['search_force_index'])) |
||
| 130 | { |
||
| 131 | // A boolean capable search engine and not forced to only use an index, we may use a non indexed search |
||
| 132 | // this is harder on the server so we are restrictive here |
||
| 133 | if (count($subwords) > 1 && preg_match('~[.:@$]~', $word)) |
||
| 134 | { |
||
| 135 | // using special characters that a full index would ignore and the remaining words are short which would also be ignored |
||
| 136 | if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length)) |
||
| 137 | { |
||
| 138 | $wordsSearch['words'][] = trim($word, "/*- "); |
||
| 139 | $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"'; |
||
| 140 | } |
||
| 141 | } |
||
| 142 | elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length) |
||
| 143 | { |
||
| 144 | // short words have feelings too |
||
| 145 | $wordsSearch['words'][] = trim($word, "/*- "); |
||
| 146 | $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"'; |
||
| 147 | } |
||
| 148 | } |
||
| 149 | |||
| 150 | $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"'; |
||
| 151 | $wordsSearch['indexed_words'][] = $fulltextWord; |
||
| 152 | if ($isExcluded) |
||
| 153 | $wordsExclude[] = $fulltextWord; |
||
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * {@inheritDoc} |
||
| 158 | */ |
||
| 159 | public function indexedWordQuery(array $words, array $search_data) |
||
| 160 | { |
||
| 161 | global $modSettings, $smcFunc; |
||
| 162 | |||
| 163 | $query_select = array( |
||
| 164 | 'id_msg' => 'm.id_msg', |
||
| 165 | ); |
||
| 166 | $query_where = array(); |
||
| 167 | $query_params = $search_data['params']; |
||
| 168 | |||
| 169 | if( $smcFunc['db_title'] == "PostgreSQL") |
||
| 170 | $modSettings['search_simple_fulltext'] = true; |
||
| 171 | |||
| 172 | if ($query_params['id_search']) |
||
| 173 | $query_select['id_search'] = '{int:id_search}'; |
||
| 174 | |||
| 175 | $count = 0; |
||
| 176 | if (empty($modSettings['search_simple_fulltext'])) |
||
| 177 | View Code Duplication | foreach ($words['words'] as $regularWord) |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 178 | { |
||
| 179 | $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}'; |
||
| 180 | $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; |
||
| 181 | } |
||
| 182 | |||
| 183 | if ($query_params['user_query']) |
||
| 184 | $query_where[] = '{raw:user_query}'; |
||
| 185 | if ($query_params['board_query']) |
||
| 186 | $query_where[] = 'm.id_board {raw:board_query}'; |
||
| 187 | |||
| 188 | if ($query_params['topic']) |
||
| 189 | $query_where[] = 'm.id_topic = {int:topic}'; |
||
| 190 | if ($query_params['min_msg_id']) |
||
| 191 | $query_where[] = 'm.id_msg >= {int:min_msg_id}'; |
||
| 192 | if ($query_params['max_msg_id']) |
||
| 193 | $query_where[] = 'm.id_msg <= {int:max_msg_id}'; |
||
| 194 | |||
| 195 | $count = 0; |
||
| 196 | View Code Duplication | if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 197 | foreach ($query_params['excluded_phrases'] as $phrase) |
||
| 198 | { |
||
| 199 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}'; |
||
| 200 | $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; |
||
| 201 | } |
||
| 202 | $count = 0; |
||
| 203 | View Code Duplication | if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 204 | foreach ($query_params['excluded_subject_words'] as $excludedWord) |
||
| 205 | { |
||
| 206 | $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}'; |
||
| 207 | $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; |
||
| 208 | } |
||
| 209 | |||
| 210 | if (!empty($modSettings['search_simple_fulltext'])) |
||
| 211 | { |
||
| 212 | View Code Duplication | if($smcFunc['db_title'] == "PostgreSQL") |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 213 | { |
||
| 214 | $language_ftx = $smcFunc['db_search_language'](); |
||
| 215 | |||
| 216 | $query_where[] = 'to_tsvector({string:language_ftx},body) @@ to_tsquery({string:language_ftx},{string:body_match})'; |
||
| 217 | $query_params['language_ftx'] = $language_ftx; |
||
| 218 | } |
||
| 219 | else |
||
| 220 | $query_where[] = 'MATCH (body) AGAINST ({string:body_match})'; |
||
| 221 | $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words'])); |
||
| 222 | } |
||
| 223 | else |
||
| 224 | { |
||
| 225 | $query_params['boolean_match'] = ''; |
||
| 226 | |||
| 227 | // remove any indexed words that are used in the complex body search terms |
||
| 228 | $words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']); |
||
| 229 | |||
| 230 | if($smcFunc['db_title'] == "PostgreSQL"){ |
||
| 231 | $row = 0; |
||
| 232 | foreach ($words['indexed_words'] as $fulltextWord) { |
||
| 233 | $query_params['boolean_match'] .= ($row <> 0 ? '&' : ''); |
||
| 234 | $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '!' : '') . $fulltextWord . ' '; |
||
| 235 | $row++; |
||
| 236 | } |
||
| 237 | } |
||
| 238 | else |
||
| 239 | foreach ($words['indexed_words'] as $fulltextWord) |
||
| 240 | $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' '; |
||
| 241 | |||
| 242 | $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1); |
||
| 243 | |||
| 244 | // if we have bool terms to search, add them in |
||
| 245 | View Code Duplication | if ($query_params['boolean_match']) { |
|
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
|
|||
| 246 | if($smcFunc['db_title'] == "PostgreSQL") |
||
| 247 | { |
||
| 248 | $language_ftx = $smcFunc['db_search_language'](); |
||
| 249 | |||
| 250 | $query_where[] = 'to_tsvector({string:language_ftx},body) @@ to_tsquery({string:language_ftx},{string:boolean_match})'; |
||
| 251 | $query_params['language_ftx'] = $language_ftx; |
||
| 252 | } |
||
| 253 | else |
||
| 254 | $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)'; |
||
| 255 | } |
||
| 256 | |||
| 257 | } |
||
| 258 | |||
| 259 | $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( ' |
||
| 260 | INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' |
||
| 261 | (' . implode(', ', array_keys($query_select)) . ')') : '') . ' |
||
| 262 | SELECT ' . implode(', ', $query_select) . ' |
||
| 263 | FROM {db_prefix}messages AS m |
||
| 264 | WHERE ' . implode(' |
||
| 265 | AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' |
||
| 266 | LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), |
||
| 267 | $query_params |
||
| 268 | ); |
||
| 269 | |||
| 270 | return $ignoreRequest; |
||
| 271 | } |
||
| 272 | } |
||
| 273 | |||
| 274 | ?> |
||
|
0 ignored issues
–
show
It is not recommended to use PHP's closing tag
?> in files other than templates.
Using a closing tag in PHP files that only contain PHP code is not recommended as you might accidentally add whitespace after the closing tag which would then be output by PHP. This can cause severe problems, for example headers cannot be sent anymore. A simple precaution is to leave off the closing tag as it is not required, and it also has no negative effects whatsoever. Loading history...
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.