Completed
Push — merge/wpes-query-builder ( 306334 )
by
unknown
56:00 queued 43:58
created

Jetpack_WPES_Query_Builder::build_query()   F

Complexity

Conditions 22
Paths 544

Size

Total Lines 103
Code Lines 54

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 22
eloc 54
nc 544
nop 0
dl 0
loc 103
rs 2.6279
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
4
/**
5
 * Provides an interface for easily building a complex search query that
6
 * combines multiple ranking signals.
7
 *
8
 *
9
 * $bldr = new Jetpack_WPES_Query_Builder();
10
 * $bldr->add_filter( ... );
11
 * $bldr->add_filter( ... );
12
 * $bldr->add_query( ... );
13
 * $es_query = $bldr->build_query();
14
 *
15
 *
16
 * All ES queries take a standard form with main query (with some filters),
17
 *  wrapped in a function_score
18
 *
19
 * Most functions are chainable, e.g. $bldr->add_filter( ... )->add_query( ... )->build_query();
20
 *
21
 * Bucketed queries use an aggregation to diversify results. eg a bunch
22
 *  of separate filters where to get different sets of results.
23
 *
24
 */
25
26
class Jetpack_WPES_Query_Builder {
27
28
	protected $es_filters = array();
29
30
	// Custom boosting with function_score
31
	protected $functions = array();
32
	protected $weighting_functions = array();
33
	protected $decays    = array();
34
	protected $scripts   = array();
35
	protected $functions_max_boost  = 2.0;
36
	protected $functions_score_mode = 'multiply';
37
	protected $functions_boost_mode = 'multiply';
38
	protected $query_bool_boost     = null;
39
40
	// General aggregations for buckets and metrics
41
	protected $aggs_query = false;
42
	protected $aggs       = array();
43
44
	// The set of top level text queries to combine
45
	protected $must_queries    = array();
46
	protected $should_queries  = array();
47
	protected $dis_max_queries = array();
48
49
	protected $diverse_buckets_query = false;
50
	protected $bucket_filters        = array();
51
	protected $bucket_sub_aggs       = array();
52
53
	////////////////////////////////////
54
	// Methods for building a query
55
56
	public function add_filter( $filter ) {
57
		$this->es_filters[] = $filter;
58
59
		return $this;
60
	}
61
62
	public function add_query( $query, $type = 'must' ) {
63
		switch ( $type ) {
64
			case 'dis_max':
65
				$this->dis_max_queries[] = $query;
66
				break;
67
68
			case 'should':
69
				$this->should_queries[] = $query;
70
				break;
71
72
			case 'must':
73
			default:
74
				$this->must_queries[] = $query;
75
				break;
76
		}
77
78
		return $this;
79
	}
80
81
	/**
82
	 * Add any weighting function to the query
83
	 *
84
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
85
	 *
86
	 * @param $function array A function structure to apply to the query
87
	 *
88
	 * @return void
89
	 */
90
	public function add_weighting_function( $function ) {
91
		$this->weighting_functions[] = $function;
92
93
		return $this;
94
	}
95
96
	/**
97
	 * Add a scoring function to the query
98
	 *
99
	 * NOTE: For decays (linear, exp, or gauss), use Jetpack_WPES_Query_Builder::add_decay() instead
100
	 *
101
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
102
	 *
103
	 * @param $function string name of the function
104
	 * @param $params array functions parameters
105
	 *
106
	 * @return void
107
	 */
108
	public function add_function( $function, $params ) {
109
		$this->functions[ $function ][] = $params;
110
111
		return $this;
112
	}
113
114
	/**
115
	 * Add a decay function to score results
116
	 *
117
	 * This method should be used instead of Jetpack_WPES_Query_Builder::add_function() for decays, as the internal  ES structure
118
	 * is slightly different for them.
119
	 *
120
	 * @see https://www.elastic.co/guide/en/elasticsearch/guide/current/decay-functions.html
121
	 *
122
	 * @param $function string name of the decay function - linear, exp, or gauss
123
	 * @param $params array The decay functions parameters, passed to ES directly
124
	 *
125
	 * @return void
126
	 */
127
	public function add_decay( $function, $params ) {
128
		$this->decays[ $function ][] = $params;
129
130
		return $this;
131
	}
132
133
	/**
134
	 * Add a scoring mode to the query
135
	 *
136
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
137
	 *
138
	 * @param $mode string name of how to score
139
	 *
140
	 * @return void
141
	 */
142
	public function add_score_mode_to_functions( $mode='multiply' ) {
143
		$this->functions_score_mode = $mode;
144
145
		return $this;
146
	}
147
148
	public function add_boost_mode_to_functions( $mode='multiply' ) {
149
		$this->functions_boost_mode = $mode;
150
151
		return $this;
152
	}
153
154
	public function add_max_boost_to_functions( $boost ) {
155
		$this->functions_max_boost = $boost;
156
157
		return $this;
158
	}
159
160
	public function add_boost_to_query_bool( $boost ) {
161
		$this->query_bool_boost = $boost;
162
163
		return $this;
164
	}
165
166
	public function add_aggs( $aggs_name, $aggs ) {
167
		$this->aggs_query = true;
168
		$this->aggs[$aggs_name] = $aggs;
169
170
		return $this;
171
	}
172
173
	public function add_aggs_sub_aggs( $aggs_name, $sub_aggs ) {
174
		if ( ! array_key_exists( 'aggs', $this->aggs[$aggs_name] ) ) {
175
			$this->aggs[$aggs_name]['aggs'] = array();
176
		}
177
		$this->aggs[$aggs_name]['aggs'] = $sub_aggs;
178
179
		return $this;
180
	}
181
182
	public function add_bucketed_query( $name, $query ) {
183
		$this->_add_bucket_filter( $name, $query );
184
185
		$this->add_query( $query, 'dis_max' );
186
187
		return $this;
188
	}
189
190
	public function add_bucketed_terms( $name, $field, $terms, $boost = 1 ) {
191
		if ( ! is_array( $terms ) ) {
192
			$terms = array( $terms );
193
		}
194
195
		$this->_add_bucket_filter( $name, array(
196
			'terms' => array(
197
				$field => $terms,
198
			),
199
		));
200
201
		$this->add_query( array(
202
			'constant_score' => array(
203
				'filter' => array(
204
					'terms' => array(
205
						$field => $terms,
206
					),
207
				),
208
				'boost' => $boost,
209
			),
210
		), 'dis_max' );
211
212
		return $this;
213
	}
214
215
	public function add_bucket_sub_aggs( $agg ) {
216
		$this->bucket_sub_aggs = array_merge( $this->bucket_sub_aggs, $agg );
217
218
		return $this;
219
	}
220
221
	protected function _add_bucket_filter( $name, $filter ) {
222
		$this->diverse_buckets_query   = true;
223
		$this->bucket_filters[ $name ] = $filter;
224
225
		return $this;
226
	}
227
228
	////////////////////////////////////
229
	// Building Final Query
230
231
	/**
232
	 * Combine all the queries, functions, decays, scripts, and max_boost into an ES query
233
	 *
234
	 * @return array Array representation of the built ES query
235
	 */
236
	public function build_query() {
237
		$query = array();
0 ignored issues
show
Unused Code introduced by
$query is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
238
239
		//dis_max queries just become a single must query
240
		if ( ! empty( $this->dis_max_queries ) ) {
241
			$this->must_queries[] = array(
242
				'dis_max' => array(
243
					'queries' => $this->dis_max_queries,
244
				),
245
			);
246
		}
247
248
		if ( empty( $this->must_queries ) ) {
249
			$this->must_queries = array(
250
				array(
251
					'match_all' => array(),
252
				),
253
			);
254
		}
255
256
		if ( empty( $this->should_queries ) ) {
257
			$query = array(
258
				'bool' => array(
259
					'must' => $this->must_queries,
260
				),
261
			);
262
		} else {
263
			$query = array(
264
				'bool' => array(
265
					'must'   => $this->must_queries,
266
					'should' => $this->should_queries,
267
				),
268
			);
269
		}
270
271
		$filter = $this->build_filter();
272
273
		if ( $filter ) {
274
			$query['bool']['filter'] = $filter;
275
		}
276
277
		if ( ! is_null( $this->query_bool_boost ) && isset( $query['bool'] ) ) {
278
			$query['bool']['boost'] = $this->query_bool_boost;
279
		}
280
281
		// If there are any function score adjustments, then combine those
282
		if ( $this->functions || $this->decays || $this->scripts || $this->weighting_functions ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->decays of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->scripts of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->weighting_functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
283
284
			if ( $this->functions ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
285
				foreach ( $this->functions as $function_type => $configs ) {
286
					foreach ( $configs as $config ) {
287
						foreach ( $config as $field => $params ) {
288
							$func_arr = $params;
289
290
							$func_arr['field'] = $field;
291
292
							$this->weighting_functions[] = array(
293
								$function_type => $func_arr,
294
							);
295
						}
296
					}
297
				}
298
			}
299
300
			if ( $this->decays ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->decays of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
301
				foreach ( $this->decays as $decay_type => $configs ) {
302
					foreach ( $configs as $config ) {
303
						foreach ( $config as $field => $params ) {
304
							$this->weighting_functions[] = array(
305
								$decay_type => array(
306
									$field => $params,
307
								),
308
							);
309
						}
310
					}
311
				}
312
			}
313
314
			if ( $this->scripts ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->scripts of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
315
				foreach ( $this->scripts as $script ) {
316
					$this->weighting_functions[] = array(
317
						'script_score' => array(
318
							'script' => $script,
319
						),
320
					);
321
				}
322
			}
323
324
			$query = array(
325
				'function_score' => array(
326
					'query'     => $query,
327
					'functions' => $this->weighting_functions,
328
					'score_mode' => $this->functions_score_mode,
329
					'boost_mode' => $this->functions_boost_mode,
330
				),
331
			);
332
			if ( $this->functions_max_boost )
333
				$query['function_score']['max_boost'] = $this->functions_max_boost;
334
335
		} // End if().
336
337
		return $query;
338
	}
339
340
	/**
341
	 * Assemble the 'filter' portion of an ES query, from all registered filters
342
	 *
343
	 * @return array|null Combined ES filters, or null if none have been defined
344
	 */
345
	public function build_filter() {
346
		if ( empty( $this->es_filters ) ) {
347
			$filter = null;
348
		} elseif ( 1 == count( $this->es_filters ) ) {
349
			$filter = $this->es_filters[0];
350
		} else {
351
			$filter = array(
352
				'and' => $this->es_filters,
353
			);
354
		}
355
356
		return $filter;
357
	}
358
359
	/**
360
	 * Assemble the 'aggregation' portion of an ES query, from all general aggregations.
361
	 *
362
	 * @return array An aggregation query as an array of topics, filters, and bucket names
363
	 */
364
	public function build_aggregation() {
365
		if ( empty( $this->bucket_sub_aggs ) && empty( $this->aggs_query ) ) {
366
			return array();
367
		}
368
369
		if ( ! $this->diverse_buckets_query && empty( $this->aggs_query ) ) {
370
			return $this->bucket_sub_aggs;
371
		}
372
373
		$aggregations = array(
374
			'topics' => array(
375
				'filters' => array(
376
					'filters' => array(),
377
				),
378
			),
379
		);
380
381
		if ( ! empty( $this->bucket_sub_aggs ) ) {
382
			$aggregations['topics']['aggs'] = $this->bucket_sub_aggs;
383
		}
384
385
		foreach ( $this->bucket_filters as $bucket_name => $filter ) {
386
			$aggregations['topics']['filters']['filters'][ $bucket_name ] = $filter;
387
		}
388
389
		if ( ! empty( $this->aggs_query ) ) {
390
			$aggregations = $this->aggs;
391
		}
392
393
		return $aggregations;
394
	}
395
396
}
397