Completed
Push — update/wpcom-search-merge ( 77e11c...ef0086 )
by
unknown
15:39 queued 09:23
created

Jetpack_WPES_Query_Builder::add_aggs()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Provides an interface for easily building a complex search query that
5
 * combines multiple ranking signals.
6
 *
7
 *
8
 * $bldr = new Jetpack_WPES_Query_Builder();
9
 * $bldr->add_filter( ... );
10
 * $bldr->add_filter( ... );
11
 * $bldr->add_query( ... );
12
 * $es_query = $bldr->build_query();
13
 *
14
 *
15
 * All ES queries take a standard form with main query (with some filters),
16
 *  wrapped in a function_score
17
 *
18
 * Most functions are chainable, e.g. $bldr->add_filter( ... )->add_query( ... )->build_query();
19
 *
20
 * Bucketed queries use an aggregation to diversify results. eg a bunch
21
 *  of separate filters where to get different sets of results.
22
 *
23
 */
24
25
class Jetpack_WPES_Query_Builder {
26
27
	protected $es_filters = array();
28
29
	// Custom boosting with function_score
30
	protected $functions = array();
31
	protected $weighting_functions = array();
32
	protected $decays    = array();
33
	protected $scripts   = array();
34
	protected $functions_max_boost  = 2.0;
35
	protected $functions_score_mode = 'multiply';
36
	protected $functions_boost_mode = 'multiply';
37
	protected $query_bool_boost     = null;
38
39
	// General aggregations for buckets and metrics
40
	protected $aggs_query = false;
41
	protected $aggs       = array();
42
43
	// The set of top level text queries to combine
44
	protected $must_queries    = array();
45
	protected $should_queries  = array();
46
	protected $dis_max_queries = array();
47
48
	protected $diverse_buckets_query = false;
49
	protected $bucket_filters        = array();
50
	protected $bucket_sub_aggs       = array();
51
52
	////////////////////////////////////
53
	// Methods for building a query
54
55
	public function add_filter( $filter ) {
56
		$this->es_filters[] = $filter;
57
58
		return $this;
59
	}
60
61
	public function add_query( $query, $type = 'must' ) {
62
		switch ( $type ) {
63
			case 'dis_max':
64
				$this->dis_max_queries[] = $query;
65
				break;
66
67
			case 'should':
68
				$this->should_queries[] = $query;
69
				break;
70
71
			case 'must':
72
			default:
73
				$this->must_queries[] = $query;
74
				break;
75
		}
76
77
		return $this;
78
	}
79
80
	/**
81
	 * Add any weighting function to the query
82
	 *
83
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
84
	 *
85
	 * @param $function array A function structure to apply to the query
86
	 *
87
	 * @return void
88
	 */
89
	public function add_weighting_function( $function ) {
90
		$this->weighting_functions[] = $function;
91
92
		return $this;
93
	}
94
95
	/**
96
	 * Add a scoring function to the query
97
	 *
98
	 * NOTE: For decays (linear, exp, or gauss), use Jetpack_WPES_Query_Builder::add_decay() instead
99
	 *
100
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
101
	 *
102
	 * @param $function string name of the function
103
	 * @param $params array functions parameters
104
	 *
105
	 * @return void
106
	 */
107
	public function add_function( $function, $params ) {
108
		$this->functions[ $function ][] = $params;
109
110
		return $this;
111
	}
112
113
	/**
114
	 * Add a decay function to score results
115
	 *
116
	 * This method should be used instead of Jetpack_WPES_Query_Builder::add_function() for decays, as the internal  ES structure
117
	 * is slightly different for them.
118
	 *
119
	 * @see https://www.elastic.co/guide/en/elasticsearch/guide/current/decay-functions.html
120
	 *
121
	 * @param $function string name of the decay function - linear, exp, or gauss
122
	 * @param $params array The decay functions parameters, passed to ES directly
123
	 *
124
	 * @return void
125
	 */
126
	public function add_decay( $function, $params ) {
127
		$this->decays[ $function ][] = $params;
128
129
		return $this;
130
	}
131
132
	/**
133
	 * Add a scoring mode to the query
134
	 *
135
	 * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
136
	 *
137
	 * @param $mode string name of how to score
138
	 *
139
	 * @return void
140
	 */
141
	public function add_score_mode_to_functions( $mode='multiply' ) {
142
		$this->functions_score_mode = $mode;
143
144
		return $this;
145
	}
146
147
	public function add_boost_mode_to_functions( $mode='multiply' ) {
148
		$this->functions_boost_mode = $mode;
149
150
		return $this;
151
	}
152
153
	public function add_max_boost_to_functions( $boost ) {
154
		$this->functions_max_boost = $boost;
155
156
		return $this;
157
	}
158
159
	public function add_boost_to_query_bool( $boost ) {
160
		$this->query_bool_boost = $boost;
161
162
		return $this;
163
	}
164
165
	public function add_aggs( $aggs_name, $aggs ) {
166
		$this->aggs_query = true;
167
		$this->aggs[$aggs_name] = $aggs;
168
169
		return $this;
170
	}
171
172
	public function add_aggs_sub_aggs( $aggs_name, $sub_aggs ) {
173
		if ( ! array_key_exists( 'aggs', $this->aggs[$aggs_name] ) ) {
174
			$this->aggs[$aggs_name]['aggs'] = array();
175
		}
176
		$this->aggs[$aggs_name]['aggs'] = $sub_aggs;
177
178
		return $this;
179
	}
180
181
	public function add_bucketed_query( $name, $query ) {
182
		$this->_add_bucket_filter( $name, $query );
183
184
		$this->add_query( $query, 'dis_max' );
185
186
		return $this;
187
	}
188
189
	public function add_bucketed_terms( $name, $field, $terms, $boost = 1 ) {
190
		if ( ! is_array( $terms ) ) {
191
			$terms = array( $terms );
192
		}
193
194
		$this->_add_bucket_filter( $name, array(
195
			'terms' => array(
196
				$field => $terms,
197
			),
198
		));
199
200
		$this->add_query( array(
201
			'constant_score' => array(
202
				'filter' => array(
203
					'terms' => array(
204
						$field => $terms,
205
					),
206
				),
207
				'boost' => $boost,
208
			),
209
		), 'dis_max' );
210
211
		return $this;
212
	}
213
214
	public function add_bucket_sub_aggs( $agg ) {
215
		$this->bucket_sub_aggs = array_merge( $this->bucket_sub_aggs, $agg );
216
217
		return $this;
218
	}
219
220
	protected function _add_bucket_filter( $name, $filter ) {
221
		$this->diverse_buckets_query   = true;
222
		$this->bucket_filters[ $name ] = $filter;
223
	}
224
225
	////////////////////////////////////
226
	// Building Final Query
227
228
	/**
229
	 * Combine all the queries, functions, decays, scripts, and max_boost into an ES query
230
	 *
231
	 * @return array Array representation of the built ES query
232
	 */
233
	public function build_query() {
234
		$query = array();
0 ignored issues
show
Unused Code introduced by
$query is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
235
236
		//dis_max queries just become a single must query
237
		if ( ! empty( $this->dis_max_queries ) ) {
238
			$this->must_queries[] = array(
239
				'dis_max' => array(
240
					'queries' => $this->dis_max_queries,
241
				),
242
			);
243
		}
244
245
		if ( empty( $this->must_queries ) ) {
246
			$this->must_queries = array(
247
				array(
248
					'match_all' => array(),
249
				),
250
			);
251
		}
252
253
		if ( empty( $this->should_queries ) ) {
254
			$query = array(
255
				'bool' => array(
256
					'must' => $this->must_queries,
257
				),
258
			);
259
		} else {
260
			$query = array(
261
				'bool' => array(
262
					'must'   => $this->must_queries,
263
					'should' => $this->should_queries,
264
				),
265
			);
266
		}
267
268
		if ( ! is_null( $this->query_bool_boost ) && isset( $query['bool'] ) ) {
269
			$query['bool']['boost'] = $this->query_bool_boost;
270
		}
271
272
		// If there are any function score adjustments, then combine those
273
		if ( $this->functions || $this->decays || $this->scripts || $this->weighting_functions ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->decays of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->scripts of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->weighting_functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
274
			$weighting_functions = array();
275
276
			if ( $this->functions ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->functions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
277
				foreach ( $this->functions as $function_type => $configs ) {
278
					foreach ( $configs as $config ) {
279
						foreach ( $config as $field => $params ) {
280
							$func_arr = $params;
281
282
							$func_arr['field'] = $field;
283
284
							$weighting_functions[] = array(
285
								$function_type => $func_arr,
286
							);
287
						}
288
					}
289
				}
290
			}
291
292
			if ( $this->decays ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->decays of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
293
				foreach ( $this->decays as $decay_type => $configs ) {
294
					foreach ( $configs as $config ) {
295
						foreach ( $config as $field => $params ) {
296
							$weighting_functions[] = array(
297
								$decay_type => array(
298
									$field => $params,
299
								),
300
							);
301
						}
302
					}
303
				}
304
			}
305
306
			if ( $this->scripts ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->scripts of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
307
				foreach ( $this->scripts as $script ) {
308
					$weighting_functions[] = array(
309
						'script_score' => array(
310
							'script' => $script,
311
						),
312
					);
313
				}
314
			}
315
316
			$query = array(
317
				'function_score' => array(
318
					'query'     => $query,
319
					'functions' => $weighting_functions,
320
					'max_boost' => $this->functions_max_boost,
321
					'score_mode' => $this->functions_score_mode,
322
					'boost_mode' => $this->functions_boost_mode,
323
				),
324
			);
325
		} // End if().
326
327
		return $query;
328
	}
329
330
	/**
331
	 * Assemble the 'filter' portion of an ES query, from all registered filters
332
	 *
333
	 * @return array|null Combined ES filters, or null if none have been defined
334
	 */
335
	public function build_filter() {
336
		if ( empty( $this->es_filters ) ) {
337
			$filter = null;
338
		} elseif ( 1 == count( $this->es_filters ) ) {
339
			$filter = $this->es_filters[0];
340
		} else {
341
			$filter = array(
342
				'and' => $this->es_filters,
343
			);
344
		}
345
346
		return $filter;
347
	}
348
349
	/**
350
	 * Assemble the 'aggregation' portion of an ES query, from all general aggregations.
351
	 *
352
	 * @return array An aggregation query as an array of topics, filters, and bucket names
353
	 */
354
	public function build_aggregation() {
355
		if ( empty( $this->bucket_sub_aggs ) && empty( $this->aggs_query ) ) {
356
			return array();
357
		}
358
359
		if ( ! $this->diverse_buckets_query && empty( $this->aggs_query ) ) {
360
			return $this->bucket_sub_aggs;
361
		}
362
363
		$aggregations = array(
364
			'topics' => array(
365
				'filters' => array(
366
					'filters' => array(),
367
				),
368
			),
369
		);
370
371
		if ( ! empty( $this->bucket_sub_aggs ) ) {
372
			$aggregations['topics']['aggs'] = $this->bucket_sub_aggs;
373
		}
374
375
		foreach ( $this->bucket_filters as $bucket_name => $filter ) {
376
			$aggregations['topics']['filters']['filters'][ $bucket_name ] = $filter;
377
		}
378
379
		if ( ! empty( $this->aggs_query ) ) {
380
			$aggregations = $this->aggs;
381
		}
382
383
		return $aggregations;
384
	}
385
386
}
387