Completed
Push — update/package-release-managem... ( 7f9196...940e6c )
by Jeremy
82:11 queued 75:34
created

truncate_string()   A

Complexity

Conditions 5
Paths 7

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
nc 7
nop 3
dl 0
loc 22
rs 9.2568
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Parse a pure text query into WordPress Elasticsearch query. This builds on
5
 * the Jetpack_WPES_Query_Builder() to provide search query parsing.
6
 *
7
 * The key part of this parser is taking a user's query string typed into a box
8
 * and converting it into an ES search query.
9
 *
10
 * This varies by application, but roughly it means extracting some parts of the query
11
 * (authors, tags, and phrases) that are treated as a filter. Then taking the
12
 * remaining words and building the correct query (possibly with prefix searching
13
 * if we are doing search as you type)
14
 *
15
 * This class only supports ES 2.x+
16
 *
17
 * This parser builds queries of the form:
18
 *   bool:
19
 *     must:
20
 *       AND match of a single field (ideally an edgengram field)
21
 *     filter:
22
 *       filter clauses from context (eg @gibrown, #news, etc)
23
 *     should:
24
 *       boosting of results by various fields
25
 *
26
 * Features supported:
27
 *  - search as you type
28
 *  - phrases
29
 *  - supports querying across multiple languages at once
30
 *
31
 * Example usage (from Search on Reader Manage):
32
 *
33
 *		require_lib( 'jetpack-wpes-query-builder/jetpack-wpes-search-query-parser' );
34
 *		$parser = new Jetpack_WPES_Search_Query_Parser( $args['q'], array( $lang ) );
35
 *
36
 *		//author
37
 *		$parser->author_field_filter( array(
38
 *			'prefixes' => array( '@' ),
39
 *			'wpcom_id_field' => 'author_id',
40
 *			'must_query_fields' => array( 'author.engram', 'author_login.engram' ),
41
 *			'boost_query_fields' => array( 'author^2', 'author_login^2', 'title.default.engram' ),
42
 *		) );
43
 *
44
 *		//remainder of query
45
 *		$match_content_fields = $parser->merge_ml_fields(
46
 *			array(
47
 *				'all_content' => 0.1,
48
 *			),
49
 *			array(
50
 *				'all_content.default.engram^0.1',
51
 *			)
52
 *		);
53
 *		$boost_content_fields = $parser->merge_ml_fields(
54
 *			array(
55
 *				'title' => 2,
56
 *				'description' => 1,
57
 *				'tags' => 1,
58
 *			),
59
 *			array(
60
 *				'author_login^2',
61
 *				'author^2',
62
 *			)
63
 *		);
64
 *
65
 *		$parser->phrase_filter( array(
66
 *			'must_query_fields' => $match_content_fields,
67
 *			'boost_query_fields' => $boost_content_fields,
68
 *		) );
69
 *		$parser->remaining_query( array(
70
 *			'must_query_fields' => $match_content_fields,
71
 *			'boost_query_fields' => $boost_content_fields,
72
 *		) );
73
 *
74
 *		//Boost on phrases
75
 *		$parser->remaining_query( array(
76
 *			'boost_query_fields' => $boost_content_fields,
77
 *			'boost_query_type'   => 'phrase',
78
 *		) );
79
 *
80
 *		//boosting
81
 *		$parser->add_max_boost_to_functions( 20 );
82
 *		$parser->add_function( 'field_value_factor', array(
83
 *			'follower_count' => array(
84
 *				'modifier' => 'sqrt',
85
 *				'factor' => 1,
86
 *				'missing' => 0,
87
 *			) ) );
88
 *
89
 *		//Filtering
90
 *		$parser->add_filter( array(
91
 *			'exists' => array( 'field' => 'langs.' . $lang )
92
 *		) );
93
 *
94
 *		//run the query
95
 *		$es_query_args = array(
96
 *			'name' => 'feeds',
97
 *			'blog_id' => false,
98
 *			'security_strategy' => 'a8c',
99
 *			'type' => 'feed,blog',
100
 *			'fields' => array( 'blog_id', 'feed_id' ),
101
 *			'query' => $parser->build_query(),
102
 *			'filter' => $parser->build_filter(),
103
 *			'size' => $size,
104
 *			'from' => $from
105
 *		);
106
 *		$es_results = es_api_search_index( $es_query_args, 'api-feed-find' );
107
 *
108
 */
109
110
jetpack_require_lib( 'jetpack-wpes-query-builder' );
111
112
class Jetpack_WPES_Search_Query_Parser extends Jetpack_WPES_Query_Builder {
113
114
	protected $orig_query = '';
115
	protected $current_query = '';
116
	protected $langs;
117
	protected $avail_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' );
118
119
	public function __construct( $user_query, $langs ) {
120
		$this->orig_query = $user_query;
121
		$this->current_query = $this->orig_query;
122
		$this->langs = $this->norm_langs( $langs );
123
	}
124
125
	protected $extracted_phrases = array();
126
127
	public function get_current_query() {
128
		return $this->current_query;
129
	}
130
131
	public function set_current_query( $q ) {
132
		$this->current_query = $q;
133
	}
134
135
	///////////////////////////////////////////////////////
136
	// Methods for Building arrays of multilingual fields
137
138
	/*
139
	 * Normalize language codes
140
	 */
141
	public function norm_langs( $langs ) {
142
		$lst = array();
143
		foreach( $langs as $l ) {
144
			$l = strtok( $l, '-_' );
145
			if ( in_array( $l, $this->avail_langs ) ) {
146
				$lst[$l] = true;
147
			} else {
148
				$lst['default'] = true;
149
			}
150
		}
151
		return array_keys( $lst );
152
	}
153
154
	/*
155
	 * Take a list of field prefixes and expand them for multi-lingual
156
	 * with the provided boostings.
157
	 */
158
	public function merge_ml_fields( $fields2boosts, $additional_fields ) {
159
		$flds = array();
160
		foreach( $fields2boosts as $f => $b ) {
161
			foreach( $this->langs as $l ) {
162
				$flds[] = $f . '.' . $l . '^' . $b;
163
			}
164
		}
165
		foreach( $additional_fields as $f ) {
166
			$flds[] = $f;
167
		}
168
		return $flds;
169
	}
170
171
	////////////////////////////////////
172
	// Extract Fields for Filtering on
173
174
	/*
175
	 * Extract any @mentions from the user query
176
	 *  use them as a filter if we can find a wp.com id
177
	 *  otherwise use them as a
178
	 *
179
	 *  args:
180
	 *    wpcom_id_field: wp.com id field
181
	 *    must_query_fields: array of fields to search for matching results (optional)
182
	 *    boost_query_fields: array of fields to search in for boosting results (optional)
183
	 *    prefixes: array of prefixes that the user can use to indicate an author
184
	 *
185
	 *  returns true/false of whether any were found
186
	 *
187
	 * See also: https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java
188
	 */
189
	public function author_field_filter( $args ) {
190
		$defaults = array(
191
			'wpcom_id_field' => 'author_id',
192
			'must_query_fields' => null,
193
			'boost_query_fields' => null,
194
			'prefixes' => array( '@' ),
195
		);
196
		$args = wp_parse_args( $args, $defaults );
197
198
		$names = array();
199 View Code Duplication
		foreach( $args['prefixes'] as $p ) {
200
			$found = $this->get_fields( $p );
201
			if ( $found ) {
202
				foreach( $found as $f ) {
203
					$names[] = $f;
204
				}
205
			}
206
		}
207
208
		if ( empty( $names ) ) {
209
			return false;
210
		}
211
212
		foreach( $args['prefixes'] as $p ) {
213
			$this->remove_fields( $p );
214
		}
215
216
		$user_ids = array();
217
		$query_names = array();
0 ignored issues
show
Unused Code introduced by
$query_names is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
218
219
		//loop through the matches and separate into filters and queries
220
		foreach( $names as $n ) {
221
			//check for exact match on login
222
			$userdata = get_user_by( 'login', strtolower( $n ) );
223
			$filtering = false;
224
			if ( $userdata ) {
225
				$user_ids[ $userdata->ID ] = true;
226
				$filtering = true;
227
			}
228
229
			$is_phrase = false;
230
			if ( preg_match( '/"/', $n ) ) {
231
				$is_phrase = true;
232
				$n = preg_replace( '/"/', '', $n );
233
			}
234
235 View Code Duplication
			if ( !empty( $args['must_query_fields'] ) && !$filtering ) {
236
				if ( $is_phrase ) {
237
					$this->add_query( array(
238
						'multi_match' => array(
239
							'fields' => $args['must_query_fields'],
240
							'query' => $n,
241
							'type' => 'phrase',
242
					) ) );
243
				} else {
244
					$this->add_query( array(
245
						'multi_match' => array(
246
							'fields' => $args['must_query_fields'],
247
							'query' => $n,
248
					) ) );
249
				}
250
			}
251
252 View Code Duplication
			if ( !empty( $args['boost_query_fields'] ) ) {
253
				if ( $is_phrase ) {
254
					$this->add_query( array(
255
						'multi_match' => array(
256
							'fields' => $args['boost_query_fields'],
257
							'query' => $n,
258
							'type' => 'phrase',
259
					) ), 'should' );
260
				} else {
261
					$this->add_query( array(
262
						'multi_match' => array(
263
							'fields' => $args['boost_query_fields'],
264
							'query' => $n,
265
					) ), 'should' );
266
				}
267
			}
268
		}
269
270
		if ( ! empty( $user_ids ) ) {
271
			$user_ids = array_keys( $user_ids );
272
			$this->add_filter( array( 'terms' => array( $args['wpcom_id_field'] => $user_ids ) ) );
273
		}
274
275
		return true;
276
	}
277
278
	/*
279
	 * Extract any prefix followed by text use them as a must clause,
280
	 *   and optionally as a boost to the should query
281
	 *   This can be used for hashtags. eg #News, or #"current events",
282
	 *   but also works for any arbitrary field. eg from:Greg
283
	 *
284
	 *  args:
285
	 *    must_query_fields: array of fields that must match the tag (optional)
286
	 *    boost_query_fields: array of fields to boost search on (optional)
287
	 *    prefixes: array of prefixes that the user can use to indicate a tag
288
	 *
289
	 *  returns true/false of whether any were found
290
	 *
291
	 */
292
	public function text_field_filter( $args ) {
293
		$defaults = array(
294
			'must_query_fields' => array( 'tag.name' ),
295
			'boost_query_fields' => array( 'tag.name' ),
296
			'prefixes' => array( '#' ),
297
		);
298
		$args = wp_parse_args( $args, $defaults );
299
300
		$tags = array();
301 View Code Duplication
		foreach( $args['prefixes'] as $p ) {
302
			$found = $this->get_fields( $p );
303
			if ( $found ) {
304
				foreach( $found as $f ) {
305
					$tags[] = $f;
306
				}
307
			}
308
		}
309
310
		if ( empty( $tags ) ) {
311
			return false;
312
		}
313
314
		foreach( $args['prefixes'] as $p ) {
315
			$this->remove_fields( $p );
316
		}
317
318
		foreach( $tags as $t ) {
319
			$is_phrase = false;
320
			if ( preg_match( '/"/', $t ) ) {
321
				$is_phrase = true;
322
				$t = preg_replace( '/"/', '', $t );
323
			}
324
325 View Code Duplication
			if ( ! empty( $args['must_query_fields'] ) ) {
326
				if ( $is_phrase ) {
327
					$this->add_query( array(
328
						'multi_match' => array(
329
							'fields' => $args['must_query_fields'],
330
							'query' => $t,
331
							'type' => 'phrase',
332
					) ) );
333
				} else {
334
					$this->add_query( array(
335
						'multi_match' => array(
336
							'fields' => $args['must_query_fields'],
337
							'query' => $t,
338
					) ) );
339
				}
340
			}
341
342 View Code Duplication
			if ( ! empty( $args['boost_query_fields'] ) ) {
343
				if ( $is_phrase ) {
344
					$this->add_query( array(
345
						'multi_match' => array(
346
							'fields' => $args['boost_query_fields'],
347
							'query' => $t,
348
							'type' => 'phrase',
349
					) ), 'should' );
350
				} else {
351
					$this->add_query( array(
352
						'multi_match' => array(
353
							'fields' => $args['boost_query_fields'],
354
							'query' => $t,
355
					) ), 'should' );
356
				}
357
			}
358
		}
359
360
		return true;
361
	}
362
363
	/*
364
	 * Extract anything surrounded by quotes or if there is an opening quote
365
	 *   that is not complete, and add them to the query as a phrase query.
366
	 *   Quotes can be either '' or ""
367
	 *
368
	 *  args:
369
	 *    must_query_fields: array of fields that must match the phrases
370
	 *    boost_query_fields: array of fields to boost the phrases on (optional)
371
	 *
372
	 *  returns true/false of whether any were found
373
	 *
374
	 */
375
	public function phrase_filter( $args ) {
376
		$defaults = array(
377
			'must_query_fields' => array( 'all_content' ),
378
			'boost_query_fields' => array( 'title' ),
379
		);
380
		$args = wp_parse_args( $args, $defaults );
381
382
		$phrases = array();
383 View Code Duplication
		if ( preg_match_all( '/"([^"]+)"/', $this->current_query, $matches ) ) {
384
			foreach ( $matches[1] as $match ) {
385
				$phrases[] = $match;
386
			}
387
			$this->current_query = preg_replace( '/"([^"]+)"/', '', $this->current_query );
388
		}
389
390 View Code Duplication
		if ( preg_match_all( "/'([^']+)'/", $this->current_query, $matches ) ) {
391
			foreach ( $matches[1] as $match ) {
392
				$phrases[] = $match;
393
			}
394
			$this->current_query = preg_replace( "/'([^']+)'/", '', $this->current_query );
395
		}
396
397
		//look for a final, uncompleted phrase
398
		$phrase_prefix = false;
399 View Code Duplication
		if ( preg_match_all( '/"([^"]+)$/', $this->current_query, $matches ) ) {
400
			$phrase_prefix = $matches[1][0];
401
			$this->current_query = preg_replace( '/"([^"]+)$/', '', $this->current_query );
402
		}
403 View Code Duplication
		if ( preg_match_all( "/(?:'\B|\B')([^']+)$/", $this->current_query, $matches ) ) {
404
			$phrase_prefix = $matches[1][0];
405
			$this->current_query = preg_replace( "/(?:'\B|\B')([^']+)$/", '', $this->current_query );
406
		}
407
408
		if ( $phrase_prefix ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $phrase_prefix of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
409
			$phrases[] = $phrase_prefix;
410
		}
411
		if ( empty( $phrases ) ) {
412
			return false;
413
		}
414
415
		foreach ( $phrases as $p ) {
416
			$this->add_query( array(
417
				'multi_match' => array(
418
					'fields' => $args['must_query_fields'],
419
					'query' => $p,
420
					'type' => 'phrase',
421
				) ) );
422
423 View Code Duplication
			if ( ! empty( $args['boost_query_fields'] ) ) {
424
				$this->add_query( array(
425
					'multi_match' => array(
426
						'fields' => $args['boost_query_fields'],
427
						'query' => $p,
428
						'operator' => 'and',
429
				) ), 'should' );
430
			}
431
		}
432
433
		return true;
434
	}
435
436
	/*
437
	 * Query fields based on the remaining parts of the query
438
	 *   This could be the final AND part of the query terms to match, or it
439
	 *   could be boosting certain elements of the query
440
	 *
441
	 *  args:
442
	 *    must_query_fields: array of fields that must match the remaining terms (optional)
443
	 *    boost_query_fields: array of fields to boost the remaining terms on (optional)
444
	 *
445
	 */
446
	public function remaining_query( $args ) {
447
		$defaults = array(
448
			'must_query_fields' => null,
449
			'boost_query_fields' => null,
450
			'boost_operator' => 'and',
451
			'boost_query_type' => 'best_fields',
452
		);
453
		$args = wp_parse_args( $args, $defaults );
454
455
		if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
456
			return;
457
		}
458
459 View Code Duplication
		if ( ! empty( $args['must_query_fields'] ) ) {
460
			$this->add_query( array(
461
				'multi_match' => array(
462
					'fields' => $args['must_query_fields'],
463
					'query' => $this->current_query,
464
					'operator' => 'and',
465
			) ) );
466
		}
467
468 View Code Duplication
		if ( ! empty( $args['boost_query_fields'] ) ) {
469
			$this->add_query( array(
470
				'multi_match' => array(
471
					'fields' => $args['boost_query_fields'],
472
					'query' => $this->current_query,
473
					'operator' => $args['boost_operator'],
474
					'type' => $args['boost_query_type'],
475
			) ), 'should' );
476
		}
477
478
	}
479
480
	/*
481
	 * Query fields using a prefix query (alphabetical expansions on the index).
482
	 *   This is not recommended. Slower performance and worse relevancy.
483
	 *
484
	 *  (UNTESTED! Copied from old prefix expansion code)
485
	 *
486
	 *  args:
487
	 *    must_query_fields: array of fields that must match the remaining terms (optional)
488
	 *    boost_query_fields: array of fields to boost the remaining terms on (optional)
489
	 *
490
	 */
491
	public function remaining_prefix_query( $args ) {
492
		$defaults = array(
493
			'must_query_fields' => array( 'all_content' ),
494
			'boost_query_fields' => array( 'title' ),
495
			'boost_operator' => 'and',
496
			'boost_query_type' => 'best_fields',
497
		);
498
		$args = wp_parse_args( $args, $defaults );
499
500
		if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
501
			return;
502
		}
503
504
		//////////////////////////////////
505
		// Example cases to think about:
506
		// "elasticse"
507
		// "elasticsearch"
508
		// "elasticsearch "
509
		// "elasticsearch lucen"
510
		// "elasticsearch lucene"
511
		// "the future"  - note the stopword which will match nothing!
512
		// "F1" - an exact match that also has tons of expansions
513
		// "こんにちは" ja "hello"
514
		// "こんにちは友人" ja "hello friend" - we just rely on the prefix phrase and ES to split words
515
		//   - this could still be better I bet. Maybe we need to analyze with ES first?
516
		//
517
518
		/////////////////////////////
519
		//extract pieces of query
520
		// eg: "PREFIXREMAINDER PREFIXWORD"
521
		//     "elasticsearch lucen"
522
523
		$prefix_word = false;
524
		$prefix_remainder = false;
0 ignored issues
show
Unused Code introduced by
$prefix_remainder is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
525
		if ( preg_match_all( '/([^ ]+)$/', $this->current_query, $matches ) ) {
526
			$prefix_word = $matches[1][0];
527
		}
528
529
		$prefix_remainder = preg_replace( '/([^ ]+)$/', '', $this->current_query );
530
		if ( ctype_space( $prefix_remainder ) ) {
531
			$prefix_remainder = false;
532
		}
533
534
		if ( ! $prefix_word ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $prefix_word of type string|false is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
535
			//Space at the end of the query, so skip using a prefix query
536 View Code Duplication
			if ( ! empty( $args['must_query_fields'] ) ) {
537
				$this->add_query( array(
538
					'multi_match' => array(
539
						'fields' => $args['must_query_fields'],
540
						'query' => $this->current_query,
541
						'operator' => 'and',
542
					) ) );
543
			}
544
545 View Code Duplication
			if ( ! empty( $args['boost_query_fields'] ) ) {
546
				$this->add_query( array(
547
					'multi_match' => array(
548
						'fields' => $args['boost_query_fields'],
549
						'query' => $this->current_query,
550
						'operator' => $args['boost_operator'],
551
						'type' => $args['boost_query_type'],
552
					) ), 'should' );
553
			}
554
		} else {
555
556
			//must match the prefix word and the prefix remainder
557
			if ( ! empty( $args['must_query_fields'] ) ) {
558
				//need to do an OR across a few fields to handle all cases
559
				$must_q = array( 'bool' => array( 'should' => array( ), 'minimum_should_match' => 1 ) );
560
561
				//treat all words as an exact search (boosts complete word like "news"
562
				//from prefixes of "newspaper")
563
				$must_q['bool']['should'][] = array( 'multi_match' => array(
564
					'fields' => $this->all_fields,
0 ignored issues
show
Bug introduced by
The property all_fields does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
565
					'query' => $full_text,
0 ignored issues
show
Bug introduced by
The variable $full_text does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
566
					'operator' => 'and',
567
					'type' => 'cross_fields',
568
				) );
569
570
				//always optimistically try and match the full text as a phrase
571
				//prefix "the futu" should try to match "the future"
572
				//otherwise the first stopword kinda breaks
573
				//This also works as the prefix match for a single word "elasticsea"
574
				$must_q['bool']['should'][] = array( 'multi_match' => array(
575
					'fields' => $this->phrase_fields,
0 ignored issues
show
Bug introduced by
The property phrase_fields does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
576
					'query' => $full_text,
577
					'operator' => 'and',
578
					'type' => 'phrase_prefix',
579
					'max_expansions' => 100,
580
				) );
581
582
				if ( $prefix_remainder ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $prefix_remainder of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
583
					//Multiple words found, so treat each word on its own and not just as
584
					//a part of a phrase
585
					//"elasticsearch lucen" => "elasticsearch" exact AND "lucen" prefix
586
					$q['bool']['should'][] = array( 'bool' => array(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$q was never initialized. Although not strictly required by PHP, it is generally a good practice to add $q = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
587
						'must' => array(
588
							array( 'multi_match' => array(
589
								'fields' => $this->phrase_fields,
590
								'query' => $prefix_word,
591
								'operator' => 'and',
592
								'type' => 'phrase_prefix',
593
								'max_expansions' => 100,
594
							) ),
595
							array( 'multi_match' => array(
596
								'fields' => $this->all_fields,
597
								'query' => $prefix_remainder,
598
								'operator' => 'and',
599
								'type' => 'cross_fields',
600
							) ),
601
						)
602
					) );
603
				}
604
605
				$this->add_query( $must_q );
606
			}
607
608
			//Now add any boosting of the query
609
			if ( ! empty( $args['boost_query_fields'] ) ) {
610
				//treat all words as an exact search (boosts complete word like "news"
611
				//from prefixes of "newspaper")
612
				$this->add_query( array(
613
					'multi_match' => array(
614
						'fields' => $args['boost_query_fields'],
615
						'query' => $this->current_query,
616
						'operator' => $args['boost_query_operator'],
617
						'type' => $args['boost_query_type'],
618
					) ), 'should' );
619
620
				//optimistically boost the full phrase prefix match
621
				$this->add_query( array(
622
					'multi_match' => array(
623
						'fields' => $args['boost_query_fields'],
624
						'query' => $this->current_query,
625
						'operator' => 'and',
626
						'type' => 'phrase_prefix',
627
						'max_expansions' => 100,
628
					) ) );
629
			}
630
		}
631
	}
632
633
	/*
634
	 * Boost results based on the lang probability overlaps
635
	 *
636
	 *  args:
637
	 *    langs2prob: list of languages to search in with associated boosts
638
	 */
639
	public function boost_lang_probs( $langs2prob ) {
640
		foreach( $langs2prob as $l => $p ) {
641
			$this->add_function( 'field_value_factor', array(
642
				'modifier' => 'none',
643
				'factor' => $p,
644
				'missing' => 0.01, //1% chance doc did not have right lang detected
645
			) );
646
		}
647
	}
648
649
	////////////////////////////////////
650
	// Helper Methods
651
652
	//Get the text after some prefix. eg @gibrown, or @"Greg Brown"
653
	protected function get_fields( $field_prefix ) {
654
		$regex = '/' . $field_prefix . '(("[^"]+")|([^\\p{Z}]+))/';
655
		if ( preg_match_all( $regex, $this->current_query, $match ) ) {
656
			return $match[1];
657
		}
658
		return false;
659
	}
660
661
	//Remove the prefix and text from the query
662
	protected function remove_fields( $field_name ) {
663
		$regex = '/' . $field_name . '(("[^"]+")|([^\\p{Z}]+))/';
664
		$this->current_query = preg_replace( $regex, '', $this->current_query );
665
	}
666
667
	//Best effort string truncation that splits on word breaks
668
	protected function truncate_string( $string, $limit, $break=" " ) {
669
		if ( mb_strwidth( $string ) <= $limit ) {
670
			return $string;
671
		}
672
673
		// walk backwards from $limit to find first break
674
		$breakpoint = $limit;
675
		$broken = false;
676
		while ( $breakpoint > 0 ) {
677
			if ( $break === mb_strimwidth( $string, $breakpoint, 1 ) ) {
678
				$string = mb_strimwidth( $string, 0, $breakpoint );
679
				$broken = true;
680
				break;
681
			}
682
			$breakpoint--;
683
		}
684
		// if we weren't able to find a break, need to chop mid-word
685
		if ( !$broken ) {
686
			$string = mb_strimwidth( $string, 0, $limit );
687
		}
688
		return $string;
689
	}
690
691
}
692