Passed
Pull Request — development (#3442)
by Elk
12:13 queued 06:23
created

Sphinx::_cleanWordSphinx()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 19
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 2
nop 2
dl 0
loc 19
rs 10
c 0
b 0
f 0
ccs 0
cts 10
cp 0
crap 6
1
<?php
2
3
/**
4
 * Used when an Sphinx search daemon is running and access is via the Sphinx
5
 * native search API (SphinxAPI)
6
 *
7
 * @package   ElkArte Forum
8
 * @copyright ElkArte Forum contributors
9
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
10
 *
11
 * This file contains code covered by:
12
 * copyright: 2011 Simple Machines (http://www.simplemachines.org)
13
 *
14
 * @version 2.0 dev
15
 *
16
 */
17
18
namespace ElkArte\Search\API;
19
20
use ElkArte\Cache\Cache;
21
use ElkArte\Errors\Errors;
22
use Elkarte\User;
23
24
/**
25
 * SearchAPI-Sphinx.class.php, Sphinx API,
26
 *
27
 * What it does:
28
 *
29
 * - used when a Sphinx search daemon is running
30
 * - Access is via the Sphinx native search API (SphinxAPI)
31
 * - sphinxapi.php is part of the Sphinx package, the file must be added to SOURCEDIR
32
 *
33
 * @package Search
34
 */
35
class Sphinx extends AbstractAPI
36
{
37
	/**
38
	 * This is the last version of ElkArte that this was tested on, to protect against API changes.
39
	 *
40
	 * @var string
41
	 */
42
	public $version_compatible = 'ElkArte 2.0 dev';
43
44
	/**
45
	 * This won't work with versions of ElkArte less than this.
46
	 *
47
	 * @var string
48
	 */
49
	public $min_elk_version = 'ElkArte 1.0 Beta 1';
50
51
	/**
52
	 * Is it supported?
53
	 *
54
	 * @var bool
55
	 */
56
	public $is_supported = true;
57
58
	/**
59
	 * What words are banned?
60
	 *
61
	 * @var array
62
	 */
63
	protected $bannedWords = [];
64
65
	/**
66
	 * What is the minimum word length?
67
	 *
68
	 * @var int
69
	 */
70
	protected $min_word_length = 4;
71
72
	/**
73
	 * What databases are supported?
74
	 *
75
	 * @var array
76
	 */
77
	protected $supported_databases = ['MySQL'];
78
79
	/**
80
	 * Check we support this db, set banned words
81
	 */
82
	public function __construct($config, $searchParams)
83
	{
84
		parent::__construct($config, $searchParams);
85
86
		// Is this database supported?
87
		if (!in_array($this->_db->title(), $this->supported_databases))
88
		{
89
			$this->is_supported = false;
90
		}
91
	}
92
93
	/**
94
	 * If the settings don't exist we can't continue.
95
	 */
96
	public function isValid()
97
	{
98
		global $modSettings;
99
100
		return !empty($modSettings['sphinx_searchd_server']) && !empty($modSettings['sphinx_searchd_port']);
101
	}
102
103
	/**
104
	 * {@inheritdoc }
105
	 */
106
	public function indexedWordQuery($words, $search_data)
107
	{
108
		// Sphinx uses its internal engine
109
	}
110
111
	/**
112
	 * {@inheritdoc }
113
	 */
114
	public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded, $excludedSubjectWords)
115
	{
116
		$subwords = text2words($word, null, false);
117
118
		$fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
119
		$wordsSearch['indexed_words'][] = $fulltextWord;
120
		if ($isExcluded !== false)
121
		{
122
			$wordsExclude[] = $fulltextWord;
123
		}
124
	}
125
126
	/**
127
	 * {@inheritdoc }
128
	 */
129
	public function searchQuery($search_words, $excluded_words, &$participants)
0 ignored issues
show
Unused Code introduced by
The parameter $excluded_words is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

129
	public function searchQuery($search_words, /** @scrutinizer ignore-unused */ $excluded_words, &$participants)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $search_words is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

129
	public function searchQuery(/** @scrutinizer ignore-unused */ $search_words, $excluded_words, &$participants)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
130
	{
131
		global $context, $modSettings;
132
133
		// Only request the results if they haven't been cached yet.
134
		$cached_results = [];
135
		$cache_key = 'search_results_' . md5(User::$info->query_see_board . '_' . $context['params']);
136
		if (!Cache::instance()->getVar($cached_results, $cache_key))
137
		{
138
			// The API communicating with the search daemon.  This file is part of Sphinix and not distributed
139
			// with ElkArte.  You will need to http://sphinxsearch.com/downloads/current/ the package and copy
140
			// the file from the api directory to your sourcedir ??/??/sources
141
			require_once(SOURCEDIR . '/sphinxapi.php');
142
143
			// Create an instance of the sphinx client and set a few options.
144
			$mySphinx = new \SphinxClient();
0 ignored issues
show
Bug introduced by
The type SphinxClient was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
145
			$mySphinx->SetServer($modSettings['sphinx_searchd_server'], (int) $modSettings['sphinx_searchd_port']);
146
			$mySphinx->SetLimits(0, (int) $modSettings['sphinx_max_results'], (int) $modSettings['sphinx_max_results'], 1000);
147
			$mySphinx->SetSelect('*' . (empty($this->_searchParams->topic) ? ', COUNT(*) num' : '') . ', WEIGHT() relevance');
0 ignored issues
show
Bug Best Practice introduced by
The property topic does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
148
149
			// Put together a sort string; besides the main column sort (relevance, id_topic, or num_replies),
150
			$this->_searchParams->sort_dir = strtoupper($this->_searchParams->sort_dir);
0 ignored issues
show
Bug Best Practice introduced by
The property sort_dir does not exist on ElkArte\Search\SearchParams. Since you implemented __set, consider adding a @property annotation.
Loading history...
Bug introduced by
It seems like $this->_searchParams->sort_dir can also be of type null; however, parameter $string of strtoupper() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

150
			$this->_searchParams->sort_dir = strtoupper(/** @scrutinizer ignore-type */ $this->_searchParams->sort_dir);
Loading history...
Bug Best Practice introduced by
The property sort_dir does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
151
			$sphinx_sort = $this->_searchParams->sort === 'id_msg' ? 'id_topic' : $this->_searchParams->sort;
0 ignored issues
show
Bug Best Practice introduced by
The property sort does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
152
153
			// Add secondary sorting based on relevance value (if not the main sort method) and age
154
			$sphinx_sort .= ' ' . $this->_searchParams->sort_dir . ($this->_searchParams->sort === 'relevance' ? '' : ', relevance DESC') . ', poster_time DESC';
155
156
			// Grouping by topic id makes it return only one result per topic, so don't set that for in-topic searches
157
			if (empty($this->_searchParams->topic))
158
			{
159
				$mySphinx->SetGroupBy('id_topic', SPH_GROUPBY_ATTR, 'relevance DESC');
0 ignored issues
show
Bug introduced by
The constant ElkArte\Search\API\SPH_GROUPBY_ATTR was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
160
			}
161
162
			// Set up the sort expression
163
			$mySphinx->SetSortMode(SPH_SORT_EXTENDED, $sphinx_sort);
0 ignored issues
show
Bug introduced by
The constant ElkArte\Search\API\SPH_SORT_EXTENDED was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
164
165
			// Update the field weights for subject vs body
166
			$subject_weight = !empty($modSettings['search_weight_subject']) ? (int) $modSettings['search_weight_subject'] : 30;
167
			$mySphinx->SetFieldWeights(array('subject' => $subject_weight, 'body' => 100 - $subject_weight));
168
169
			// Set the limits based on the search parameters.
170
			$this->buildQueryLimits($mySphinx);
171
172
			// Construct the (binary mode & |) query while accounting for excluded words
173
			$query = $this->_searchArray->searchArrayExtended($this->_searchParams->search);
0 ignored issues
show
Bug Best Practice introduced by
The property search does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
174
175
			// If no search terms are left after comparing against excluded words (i.e. "test -test" or "test last -test -last"),
176
			// sending that to Sphinx would result in a fatal error
177
			if (trim($query) === '')
178
			{
179
				// Instead, fail gracefully (return "no results")
180
				return 0;
181
			}
182
183
			// Subject only searches need to be specified.
184
			if ($this->_searchParams->subject_only)
0 ignored issues
show
Bug Best Practice introduced by
The property subject_only does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
185
			{
186
				$query = '@(subject) ' . $query;
187
			}
188
189
			$mySphinx->SetRankingMode(SPH_RANK_EXPR, 'sum((4*lcs+2*(min_hit_pos==1)+word_count)*user_weight*position) + acprel + bm25');
0 ignored issues
show
Bug introduced by
The constant ElkArte\Search\API\SPH_RANK_EXPR was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
190
191
			// Execute the search query.
192
			$index = (!empty($modSettings['sphinx_index_prefix']) ? $modSettings['sphinx_index_prefix'] : 'elkarte') . '_index';
193
			$request = $mySphinx->Query($query, $index);
194
195
			// Can a connection to the daemon be made?
196
			if ($request === false)
197
			{
198
				// Just log the error.
199
				if ($mySphinx->GetLastError())
200
				{
201
					Errors::instance()->log_error($mySphinx->GetLastError());
202
				}
203
204
				Errors::instance()->fatal_lang_error('error_no_search_daemon');
205
			}
206
207
			// Get the relevant information from the search results.
208
			$cached_results = array(
209
				'matches' => [],
210
				'num_results' => $request['total'],
211
			);
212
213
			if (isset($request['matches']))
214
			{
215
				foreach ($request['matches'] as $msgID => $match)
216
				{
217
					$cached_results['matches'][$msgID] = array(
218
						'id' => $match['attrs']['id_topic'],
219
						'relevance' => round($match['attrs']['@count'] + $match['attrs']['relevance'] / 5000, 1) . '%',
220
						'num_matches' => empty($this->_searchParams->topic) ? $match['attrs']['@count'] : 0,
221
						'matches' => [],
222
					);
223
				}
224
			}
225
226
			// Store the search results in the cache.
227
			Cache::instance()->put($cache_key, $cached_results, 600);
228
		}
229
230
		$participants = [];
231
		$topics = [];
232
		foreach (array_slice(array_keys($cached_results['matches']), $this->_req->getRequest('start', 'intval', 0), $modSettings['search_results_per_page']) as $msgID)
233
		{
234
			$topics[$msgID] = $cached_results['matches'][$msgID];
235
			$participants[$cached_results['matches'][$msgID]['id']] = false;
236
		}
237
238
		$this->_num_results = $cached_results['num_results'];
239
240
		return $topics;
241
	}
242
243
	public function useWordIndex()
244
	{
245
		return false;
246
	}
247
248
	/**
249
	 * Builds the query modifiers based on age, member, board etc
250
	 *
251
	 * @param \SphinxClient $mySphinx
252
	 */
253
	public function buildQueryLimits($mySphinx)
254
	{
255
		global $modSettings;
256
257
		if (!empty($this->_searchParams->min_msg_id) || !empty($this->_searchParams->max_msg_id))
0 ignored issues
show
Bug Best Practice introduced by
The property max_msg_id does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
Bug Best Practice introduced by
The property min_msg_id does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
258
		{
259
			$mySphinx->SetIDRange($this->_searchParams->min_msg_id, empty($this->_searchParams->max_msg_id) ? (int) $modSettings['maxMsgID'] : $this->_searchParams->max_msg_id);
260
		}
261
262
		if (!empty($this->_searchParams->topic))
0 ignored issues
show
Bug Best Practice introduced by
The property topic does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
263
		{
264
			$mySphinx->SetFilter('id_topic', array((int) $this->_searchParams->topic));
265
		}
266
267
		if (!empty($this->_searchParams->brd))
0 ignored issues
show
Bug Best Practice introduced by
The property brd does not exist on ElkArte\Search\SearchParams. Since you implemented __get, consider adding a @property annotation.
Loading history...
268
		{
269
			$mySphinx->SetFilter('id_board', $this->_searchParams->brd);
270
		}
271
272
		if (!empty($this->_searchParams->_memberlist))
273
		{
274
			$mySphinx->SetFilter('id_member', $this->_searchParams->_memberlist);
275
		}
276
	}
277
}
278