Automattic /
jetpack
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * Parse a pure text query into WordPress Elasticsearch query. This builds on |
||
| 5 | * the Jetpack_WPES_Query_Builder() to provide search query parsing. |
||
| 6 | * |
||
| 7 | * The key part of this parser is taking a user's query string typed into a box |
||
| 8 | * and converting it into an ES search query. |
||
| 9 | * |
||
| 10 | * This varies by application, but roughly it means extracting some parts of the query |
||
| 11 | * (authors, tags, and phrases) that are treated as a filter. Then taking the |
||
| 12 | * remaining words and building the correct query (possibly with prefix searching |
||
| 13 | * if we are doing search as you type) |
||
| 14 | * |
||
| 15 | * This class only supports ES 2.x+ |
||
| 16 | * |
||
| 17 | * This parser builds queries of the form: |
||
| 18 | * bool: |
||
| 19 | * must: |
||
| 20 | * AND match of a single field (ideally an edgengram field) |
||
| 21 | * filter: |
||
| 22 | * filter clauses from context (eg @gibrown, #news, etc) |
||
| 23 | * should: |
||
| 24 | * boosting of results by various fields |
||
| 25 | * |
||
| 26 | * Features supported: |
||
| 27 | * - search as you type |
||
| 28 | * - phrases |
||
| 29 | * - supports querying across multiple languages at once |
||
| 30 | * |
||
| 31 | * Example usage (from Search on Reader Manage): |
||
| 32 | * |
||
| 33 | * require_lib( 'jetpack-wpes-query-builder/jetpack-wpes-search-query-parser' ); |
||
| 34 | * $parser = new WPES_Search_Query_Parser( $args['q'], array( $lang ) ); |
||
| 35 | * |
||
| 36 | * //author |
||
| 37 | * $parser->author_field_filter( array( |
||
| 38 | * 'prefixes' => array( '@' ), |
||
| 39 | * 'wpcom_id_field' => 'author_id', |
||
| 40 | * 'must_query_fields' => array( 'author.engram', 'author_login.engram' ), |
||
| 41 | * 'boost_query_fields' => array( 'author^2', 'author_login^2', 'title.default.engram' ), |
||
| 42 | * ) ); |
||
| 43 | * |
||
| 44 | * //remainder of query |
||
| 45 | * $match_content_fields = $parser->merge_ml_fields( |
||
| 46 | * array( |
||
| 47 | * 'all_content' => 0.1, |
||
| 48 | * ), |
||
| 49 | * array( |
||
| 50 | * 'all_content.default.engram^0.1', |
||
| 51 | * ) |
||
| 52 | * ); |
||
| 53 | * $boost_content_fields = $parser->merge_ml_fields( |
||
| 54 | * array( |
||
| 55 | * 'title' => 2, |
||
| 56 | * 'description' => 1, |
||
| 57 | * 'tags' => 1, |
||
| 58 | * ), |
||
| 59 | * array( |
||
| 60 | * 'author_login^2', |
||
| 61 | * 'author^2', |
||
| 62 | * ) |
||
| 63 | * ); |
||
| 64 | * |
||
| 65 | * $parser->phrase_filter( array( |
||
| 66 | * 'must_query_fields' => $match_content_fields, |
||
| 67 | * 'boost_query_fields' => $boost_content_fields, |
||
| 68 | * ) ); |
||
| 69 | * $parser->remaining_query( array( |
||
| 70 | * 'must_query_fields' => $match_content_fields, |
||
| 71 | * 'boost_query_fields' => $boost_content_fields, |
||
| 72 | * ) ); |
||
| 73 | * |
||
| 74 | * //Boost on phrases |
||
| 75 | * $parser->remaining_query( array( |
||
| 76 | * 'boost_query_fields' => $boost_content_fields, |
||
| 77 | * 'boost_query_type' => 'phrase', |
||
| 78 | * ) ); |
||
| 79 | * |
||
| 80 | * //boosting |
||
| 81 | * $parser->add_max_boost_to_functions( 20 ); |
||
| 82 | * $parser->add_function( 'field_value_factor', array( |
||
| 83 | * 'follower_count' => array( |
||
| 84 | * 'modifier' => 'sqrt', |
||
| 85 | * 'factor' => 1, |
||
| 86 | * 'missing' => 0, |
||
| 87 | * ) ) ); |
||
| 88 | * |
||
| 89 | * //Filtering |
||
| 90 | * $parser->add_filter( array( |
||
| 91 | * 'exists' => array( 'field' => 'langs.' . $lang ) |
||
| 92 | * ) ); |
||
| 93 | * |
||
| 94 | * //run the query |
||
| 95 | * $es_query_args = array( |
||
| 96 | * 'name' => 'feeds', |
||
| 97 | * 'blog_id' => false, |
||
| 98 | * 'security_strategy' => 'a8c', |
||
| 99 | * 'type' => 'feed,blog', |
||
| 100 | * 'fields' => array( 'blog_id', 'feed_id' ), |
||
| 101 | * 'query' => $parser->build_query(), |
||
| 102 | * 'filter' => $parser->build_filter(), |
||
| 103 | * 'size' => $size, |
||
| 104 | * 'from' => $from |
||
| 105 | * ); |
||
| 106 | * $es_results = es_api_search_index( $es_query_args, 'api-feed-find' ); |
||
| 107 | * |
||
| 108 | */ |
||
| 109 | |||
| 110 | jetpack_require_lib( 'jetpack-wpes-query-builder' ); |
||
| 111 | |||
| 112 | class Jetpack_WPES_Search_Query_Parser extends Jetpack_WPES_Query_Builder { |
||
| 113 | |||
| 114 | var $orig_query = ''; |
||
|
0 ignored issues
–
show
|
|||
| 115 | var $current_query = ''; |
||
|
0 ignored issues
–
show
The visibility should be declared for property
$current_query.
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using class A {
var $property;
}
the property is implicitly global. To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2. Loading history...
|
|||
| 116 | var $langs; |
||
|
0 ignored issues
–
show
The visibility should be declared for property
$langs.
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using class A {
var $property;
}
the property is implicitly global. To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2. Loading history...
|
|||
| 117 | var $avail_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' ); |
||
|
0 ignored issues
–
show
The visibility should be declared for property
$avail_langs.
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using class A {
var $property;
}
the property is implicitly global. To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2. Loading history...
|
|||
| 118 | |||
| 119 | function __construct( $user_query, $langs ) { |
||
| 120 | $this->orig_query = $user_query; |
||
| 121 | $this->current_query = $this->orig_query; |
||
| 122 | $this->langs = $this->norm_langs( $langs ); |
||
| 123 | } |
||
| 124 | |||
| 125 | var $extracted_phrases = array(); |
||
|
0 ignored issues
–
show
The visibility should be declared for property
$extracted_phrases.
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using class A {
var $property;
}
the property is implicitly global. To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2. Loading history...
|
|||
| 126 | |||
| 127 | /////////////////////////////////////////////////////// |
||
| 128 | // Methods for Building arrays of multilingual fields |
||
| 129 | |||
| 130 | /* |
||
| 131 | * Normalize language codes |
||
| 132 | */ |
||
| 133 | function norm_langs( $langs ) { |
||
| 134 | $lst = array(); |
||
| 135 | foreach( $langs as $l ) { |
||
| 136 | $l = strtok( $l, '-_' ); |
||
| 137 | if ( in_array( $l, $this->avail_langs ) ) { |
||
| 138 | $lst[$l] = true; |
||
| 139 | } else { |
||
| 140 | $lst['default'] = true; |
||
| 141 | } |
||
| 142 | } |
||
| 143 | return array_keys( $lst ); |
||
| 144 | } |
||
| 145 | |||
| 146 | /* |
||
| 147 | * Take a list of field prefixes and expand them for multi-lingual |
||
| 148 | * with the provided boostings. |
||
| 149 | */ |
||
| 150 | function merge_ml_fields( $fields2boosts, $additional_fields ) { |
||
| 151 | $flds = array(); |
||
| 152 | foreach( $fields2boosts as $f => $b ) { |
||
| 153 | foreach( $this->langs as $l ) { |
||
| 154 | $flds[] = $f . '.' . $l . '^' . $b; |
||
| 155 | } |
||
| 156 | } |
||
| 157 | foreach( $additional_fields as $f ) { |
||
| 158 | $flds[] = $f; |
||
| 159 | } |
||
| 160 | return $flds; |
||
| 161 | } |
||
| 162 | |||
| 163 | //////////////////////////////////// |
||
| 164 | // Extract Fields for Filtering on |
||
| 165 | |||
| 166 | /* |
||
| 167 | * Extract any @mentions from the user query |
||
| 168 | * use them as a filter if we can find a wp.com id |
||
| 169 | * otherwise use them as a |
||
| 170 | * |
||
| 171 | * args: |
||
| 172 | * wpcom_id_field: wp.com id field |
||
| 173 | * must_query_fields: array of fields to search for matching results (optional) |
||
| 174 | * boost_query_fields: array of fields to search in for boosting results (optional) |
||
| 175 | * prefixes: array of prefixes that the user can use to indicate an author |
||
| 176 | * |
||
| 177 | * returns true/false of whether any were found |
||
| 178 | * |
||
| 179 | * See also: https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java |
||
| 180 | */ |
||
| 181 | function author_field_filter( $args ) { |
||
| 182 | $defaults = array( |
||
| 183 | 'wpcom_id_field' => 'author_id', |
||
| 184 | 'must_query_fields' => null, |
||
| 185 | 'boost_query_fields' => null, |
||
| 186 | 'prefixes' => array( '@' ), |
||
| 187 | ); |
||
| 188 | $args = wp_parse_args( $args, $defaults ); |
||
| 189 | |||
| 190 | $names = array(); |
||
| 191 | View Code Duplication | foreach( $args['prefixes'] as $p ) { |
|
| 192 | $found = $this->get_fields( $p ); |
||
| 193 | if ( $found ) { |
||
| 194 | foreach( $found as $f ) { |
||
| 195 | $names[] = $f; |
||
| 196 | } |
||
| 197 | } |
||
| 198 | } |
||
| 199 | |||
| 200 | if ( empty( $names ) ) { |
||
| 201 | return false; |
||
| 202 | } |
||
| 203 | |||
| 204 | foreach( $args['prefixes'] as $p ) { |
||
| 205 | $this->remove_fields( $p ); |
||
| 206 | } |
||
| 207 | |||
| 208 | $user_ids = array(); |
||
| 209 | $query_names = array(); |
||
|
0 ignored issues
–
show
$query_names is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the Loading history...
|
|||
| 210 | |||
| 211 | //loop through the matches and separate into filters and queries |
||
| 212 | foreach( $names as $n ) { |
||
| 213 | //check for exact match on login |
||
| 214 | $userdata = get_user_by( 'login', strtolower( $n ) ); |
||
| 215 | $filtering = false; |
||
| 216 | if ( $userdata ) { |
||
| 217 | $user_ids[ $userdata->ID ] = true; |
||
| 218 | $filtering = true; |
||
| 219 | } |
||
| 220 | |||
| 221 | $is_phrase = false; |
||
| 222 | if ( preg_match( '/"/', $n ) ) { |
||
| 223 | $is_phrase = true; |
||
| 224 | $n = preg_replace( '/"/', '', $n ); |
||
| 225 | } |
||
| 226 | |||
| 227 | View Code Duplication | if ( !empty( $args['must_query_fields'] ) && !$filtering ) { |
|
| 228 | if ( $is_phrase ) { |
||
| 229 | $this->add_query( array( |
||
| 230 | 'multi_match' => array( |
||
| 231 | 'fields' => $args['must_query_fields'], |
||
| 232 | 'query' => $n, |
||
| 233 | 'type' => 'phrase', |
||
| 234 | ) ) ); |
||
| 235 | } else { |
||
| 236 | $this->add_query( array( |
||
| 237 | 'multi_match' => array( |
||
| 238 | 'fields' => $args['must_query_fields'], |
||
| 239 | 'query' => $n, |
||
| 240 | ) ) ); |
||
| 241 | } |
||
| 242 | } |
||
| 243 | |||
| 244 | View Code Duplication | if ( !empty( $args['boost_query_fields'] ) ) { |
|
| 245 | if ( $is_phrase ) { |
||
| 246 | $this->add_query( array( |
||
| 247 | 'multi_match' => array( |
||
| 248 | 'fields' => $args['boost_query_fields'], |
||
| 249 | 'query' => $n, |
||
| 250 | 'type' => 'phrase', |
||
| 251 | ) ), 'should' ); |
||
| 252 | } else { |
||
| 253 | $this->add_query( array( |
||
| 254 | 'multi_match' => array( |
||
| 255 | 'fields' => $args['boost_query_fields'], |
||
| 256 | 'query' => $n, |
||
| 257 | ) ), 'should' ); |
||
| 258 | } |
||
| 259 | } |
||
| 260 | } |
||
| 261 | |||
| 262 | if ( ! empty( $user_ids ) ) { |
||
| 263 | $user_ids = array_keys( $user_ids ); |
||
| 264 | $this->add_filter( array( 'terms' => array( $args['wpcom_id_field'] => $user_ids ) ) ); |
||
| 265 | } |
||
| 266 | |||
| 267 | return true; |
||
| 268 | } |
||
| 269 | |||
| 270 | /* |
||
| 271 | * Extract any prefix followed by text use them as a must clause, |
||
| 272 | * and optionally as a boost to the should query |
||
| 273 | * This can be used for hashtags. eg #News, or #"current events", |
||
| 274 | * but also works for any arbitrary field. eg from:Greg |
||
| 275 | * |
||
| 276 | * args: |
||
| 277 | * must_query_fields: array of fields that must match the tag (optional) |
||
| 278 | * boost_query_fields: array of fields to boost search on (optional) |
||
| 279 | * prefixes: array of prefixes that the user can use to indicate a tag |
||
| 280 | * |
||
| 281 | * returns true/false of whether any were found |
||
| 282 | * |
||
| 283 | */ |
||
| 284 | function text_field_filter( $args ) { |
||
| 285 | $defaults = array( |
||
| 286 | 'must_query_fields' => array( 'tag.name' ), |
||
| 287 | 'boost_query_fields' => array( 'tag.name' ), |
||
| 288 | 'prefixes' => array( '#' ), |
||
| 289 | ); |
||
| 290 | $args = wp_parse_args( $args, $defaults ); |
||
| 291 | |||
| 292 | $tags = array(); |
||
| 293 | View Code Duplication | foreach( $args['prefixes'] as $p ) { |
|
| 294 | $found = $this->get_fields( $p ); |
||
| 295 | if ( $found ) { |
||
| 296 | foreach( $found as $f ) { |
||
| 297 | $tags[] = $f; |
||
| 298 | } |
||
| 299 | } |
||
| 300 | } |
||
| 301 | |||
| 302 | if ( empty( $tags ) ) { |
||
| 303 | return false; |
||
| 304 | } |
||
| 305 | |||
| 306 | foreach( $args['prefixes'] as $p ) { |
||
| 307 | $this->remove_fields( $p ); |
||
| 308 | } |
||
| 309 | |||
| 310 | foreach( $tags as $t ) { |
||
| 311 | $is_phrase = false; |
||
| 312 | if ( preg_match( '/"/', $t ) ) { |
||
| 313 | $is_phrase = true; |
||
| 314 | $t = preg_replace( '/"/', '', $t ); |
||
| 315 | } |
||
| 316 | |||
| 317 | View Code Duplication | if ( ! empty( $args['must_query_fields'] ) ) { |
|
| 318 | if ( $is_phrase ) { |
||
| 319 | $this->add_query( array( |
||
| 320 | 'multi_match' => array( |
||
| 321 | 'fields' => $args['must_query_fields'], |
||
| 322 | 'query' => $t, |
||
| 323 | 'type' => 'phrase', |
||
| 324 | ) ) ); |
||
| 325 | } else { |
||
| 326 | $this->add_query( array( |
||
| 327 | 'multi_match' => array( |
||
| 328 | 'fields' => $args['must_query_fields'], |
||
| 329 | 'query' => $t, |
||
| 330 | ) ) ); |
||
| 331 | } |
||
| 332 | } |
||
| 333 | |||
| 334 | View Code Duplication | if ( ! empty( $args['boost_query_fields'] ) ) { |
|
| 335 | if ( $is_phrase ) { |
||
| 336 | $this->add_query( array( |
||
| 337 | 'multi_match' => array( |
||
| 338 | 'fields' => $args['boost_query_fields'], |
||
| 339 | 'query' => $t, |
||
| 340 | 'type' => 'phrase', |
||
| 341 | ) ), 'should' ); |
||
| 342 | } else { |
||
| 343 | $this->add_query( array( |
||
| 344 | 'multi_match' => array( |
||
| 345 | 'fields' => $args['boost_query_fields'], |
||
| 346 | 'query' => $t, |
||
| 347 | ) ), 'should' ); |
||
| 348 | } |
||
| 349 | } |
||
| 350 | } |
||
| 351 | |||
| 352 | return true; |
||
| 353 | } |
||
| 354 | |||
| 355 | /* |
||
| 356 | * Extract anything surrounded by quotes or if there is an opening quote |
||
| 357 | * that is not complete, and add them to the query as a phrase query. |
||
| 358 | * Quotes can be either '' or "" |
||
| 359 | * |
||
| 360 | * args: |
||
| 361 | * must_query_fields: array of fields that must match the phrases |
||
| 362 | * boost_query_fields: array of fields to boost the phrases on (optional) |
||
| 363 | * |
||
| 364 | * returns true/false of whether any were found |
||
| 365 | * |
||
| 366 | */ |
||
| 367 | function phrase_filter( $args ) { |
||
| 368 | $defaults = array( |
||
| 369 | 'must_query_fields' => array( 'all_content' ), |
||
| 370 | 'boost_query_fields' => array( 'title' ), |
||
| 371 | ); |
||
| 372 | $args = wp_parse_args( $args, $defaults ); |
||
| 373 | |||
| 374 | $phrases = array(); |
||
| 375 | View Code Duplication | if ( preg_match_all( '/"([^"]+)"/', $this->current_query, $matches ) ) { |
|
| 376 | foreach ( $matches[1] as $match ) { |
||
| 377 | $phrases[] = $match; |
||
| 378 | } |
||
| 379 | $this->current_query = preg_replace( '/"([^"]+)"/', '', $this->current_query ); |
||
| 380 | } |
||
| 381 | |||
| 382 | View Code Duplication | if ( preg_match_all( "/'([^']+)'/", $this->current_query, $matches ) ) { |
|
| 383 | foreach ( $matches[1] as $match ) { |
||
| 384 | $phrases[] = $match; |
||
| 385 | } |
||
| 386 | $this->current_query = preg_replace( "/'([^']+)'/", '', $this->current_query ); |
||
| 387 | } |
||
| 388 | |||
| 389 | //look for a final, uncompleted phrase |
||
| 390 | $phrase_prefix = false; |
||
| 391 | View Code Duplication | if ( preg_match_all( '/"([^"]+)$/', $this->current_query, $matches ) ) { |
|
| 392 | $phrase_prefix = $matches[1][0]; |
||
| 393 | $this->current_query = preg_replace( '/"([^"]+)$/', '', $this->current_query ); |
||
| 394 | } |
||
| 395 | View Code Duplication | if ( preg_match_all( "/'([^']+)$/", $this->current_query, $matches ) ) { |
|
| 396 | $phrase_prefix = $matches[1][0]; |
||
| 397 | $this->current_query = preg_replace( "/'([^']+)$/", '', $this->current_query ); |
||
| 398 | } |
||
| 399 | |||
| 400 | if ( $phrase_prefix ) { |
||
|
0 ignored issues
–
show
The expression
$phrase_prefix of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.
In PHP, under loose comparison (like For '' == false // true
'' == null // true
'ab' == false // false
'ab' == null // false
// It is often better to use strict comparison
'' === false // false
'' === null // false
Loading history...
|
|||
| 401 | $phrases[] = $phrase_prefix; |
||
| 402 | } |
||
| 403 | if ( empty( $phrases ) ) { |
||
| 404 | return false; |
||
| 405 | } |
||
| 406 | |||
| 407 | foreach ( $phrases as $p ) { |
||
| 408 | $this->add_query( array( |
||
| 409 | 'multi_match' => array( |
||
| 410 | 'fields' => $args['must_query_fields'], |
||
| 411 | 'query' => $p, |
||
| 412 | 'type' => 'phrase', |
||
| 413 | ) ) ); |
||
| 414 | |||
| 415 | View Code Duplication | if ( ! empty( $args['boost_query_fields'] ) ) { |
|
| 416 | $this->add_query( array( |
||
| 417 | 'multi_match' => array( |
||
| 418 | 'fields' => $args['boost_query_fields'], |
||
| 419 | 'query' => $p, |
||
| 420 | 'operator' => 'and', |
||
| 421 | ) ), 'should' ); |
||
| 422 | } |
||
| 423 | } |
||
| 424 | |||
| 425 | return true; |
||
| 426 | } |
||
| 427 | |||
| 428 | /* |
||
| 429 | * Query fields based on the remaining parts of the query |
||
| 430 | * This could be the final AND part of the query terms to match, or it |
||
| 431 | * could be boosting certain elements of the query |
||
| 432 | * |
||
| 433 | * args: |
||
| 434 | * must_query_fields: array of fields that must match the remaining terms (optional) |
||
| 435 | * boost_query_fields: array of fields to boost the remaining terms on (optional) |
||
| 436 | * |
||
| 437 | */ |
||
| 438 | function remaining_query( $args ) { |
||
| 439 | $defaults = array( |
||
| 440 | 'must_query_fields' => null, |
||
| 441 | 'boost_query_fields' => null, |
||
| 442 | 'boost_operator' => 'and', |
||
| 443 | 'boost_query_type' => 'best_fields', |
||
| 444 | ); |
||
| 445 | $args = wp_parse_args( $args, $defaults ); |
||
| 446 | |||
| 447 | if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) { |
||
| 448 | return; |
||
| 449 | } |
||
| 450 | |||
| 451 | View Code Duplication | if ( ! empty( $args['must_query_fields'] ) ) { |
|
| 452 | $this->add_query( array( |
||
| 453 | 'multi_match' => array( |
||
| 454 | 'fields' => $args['must_query_fields'], |
||
| 455 | 'query' => $this->current_query, |
||
| 456 | 'operator' => 'and', |
||
| 457 | ) ) ); |
||
| 458 | } |
||
| 459 | |||
| 460 | View Code Duplication | if ( ! empty( $args['boost_query_fields'] ) ) { |
|
| 461 | $this->add_query( array( |
||
| 462 | 'multi_match' => array( |
||
| 463 | 'fields' => $args['boost_query_fields'], |
||
| 464 | 'query' => $this->current_query, |
||
| 465 | 'operator' => $args['boost_operator'], |
||
| 466 | 'type' => $args['boost_query_type'], |
||
| 467 | ) ), 'should' ); |
||
| 468 | } |
||
| 469 | |||
| 470 | } |
||
| 471 | |||
| 472 | /* |
||
| 473 | * Query fields using a prefix query (alphabetical expansions on the index). |
||
| 474 | * This is not recommended. Slower performance and worse relevancy. |
||
| 475 | * |
||
| 476 | * (UNTESTED! Copied from old prefix expansion code) |
||
| 477 | * |
||
| 478 | * args: |
||
| 479 | * must_query_fields: array of fields that must match the remaining terms (optional) |
||
| 480 | * boost_query_fields: array of fields to boost the remaining terms on (optional) |
||
| 481 | * |
||
| 482 | */ |
||
| 483 | function remaining_prefix_query( $args ) { |
||
| 484 | $defaults = array( |
||
| 485 | 'must_query_fields' => array( 'all_content' ), |
||
| 486 | 'boost_query_fields' => array( 'title' ), |
||
| 487 | 'boost_operator' => 'and', |
||
| 488 | 'boost_query_type' => 'best_fields', |
||
| 489 | ); |
||
| 490 | $args = wp_parse_args( $args, $defaults ); |
||
| 491 | |||
| 492 | if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) { |
||
| 493 | return; |
||
| 494 | } |
||
| 495 | |||
| 496 | ////////////////////////////////// |
||
| 497 | // Example cases to think about: |
||
| 498 | // "elasticse" |
||
| 499 | // "elasticsearch" |
||
| 500 | // "elasticsearch " |
||
| 501 | // "elasticsearch lucen" |
||
| 502 | // "elasticsearch lucene" |
||
| 503 | // "the future" - note the stopword which will match nothing! |
||
| 504 | // "F1" - an exact match that also has tons of expansions |
||
| 505 | // "こんにちは" ja "hello" |
||
| 506 | // "こんにちは友人" ja "hello friend" - we just rely on the prefix phrase and ES to split words |
||
| 507 | // - this could still be better I bet. Maybe we need to analyze with ES first? |
||
| 508 | // |
||
| 509 | |||
| 510 | ///////////////////////////// |
||
| 511 | //extract pieces of query |
||
| 512 | // eg: "PREFIXREMAINDER PREFIXWORD" |
||
| 513 | // "elasticsearch lucen" |
||
| 514 | |||
| 515 | $prefix_word = false; |
||
| 516 | $prefix_remainder = false; |
||
|
0 ignored issues
–
show
$prefix_remainder is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the Loading history...
|
|||
| 517 | if ( preg_match_all( '/([^ ]+)$/', $this->current_query, $matches ) ) { |
||
| 518 | $prefix_word = $matches[1][0]; |
||
| 519 | } |
||
| 520 | |||
| 521 | $prefix_remainder = preg_replace( '/([^ ]+)$/', '', $this->current_query ); |
||
| 522 | if ( ctype_space( $prefix_remainder ) ) { |
||
| 523 | $prefix_remainder = false; |
||
| 524 | } |
||
| 525 | |||
| 526 | if ( ! $prefix_word ) { |
||
|
0 ignored issues
–
show
The expression
$prefix_word of type string|false is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === false instead.
In PHP, under loose comparison (like For '' == false // true
'' == null // true
'ab' == false // false
'ab' == null // false
// It is often better to use strict comparison
'' === false // false
'' === null // false
Loading history...
|
|||
| 527 | //Space at the end of the query, so skip using a prefix query |
||
| 528 | View Code Duplication | if ( ! empty( $args['must_query_fields'] ) ) { |
|
| 529 | $this->add_query( array( |
||
| 530 | 'multi_match' => array( |
||
| 531 | 'fields' => $args['must_query_fields'], |
||
| 532 | 'query' => $this->current_query, |
||
| 533 | 'operator' => 'and', |
||
| 534 | ) ) ); |
||
| 535 | } |
||
| 536 | |||
| 537 | View Code Duplication | if ( ! empty( $args['boost_query_fields'] ) ) { |
|
| 538 | $this->add_query( array( |
||
| 539 | 'multi_match' => array( |
||
| 540 | 'fields' => $args['boost_query_fields'], |
||
| 541 | 'query' => $this->current_query, |
||
| 542 | 'operator' => $args['boost_operator'], |
||
| 543 | 'type' => $args['boost_query_type'], |
||
| 544 | ) ), 'should' ); |
||
| 545 | } |
||
| 546 | } else { |
||
| 547 | |||
| 548 | //must match the prefix word and the prefix remainder |
||
| 549 | if ( ! empty( $args['must_query_fields'] ) ) { |
||
| 550 | //need to do an OR across a few fields to handle all cases |
||
| 551 | $must_q = array( 'bool' => array( 'should' => array( ), 'minimum_should_match' => 1 ) ); |
||
| 552 | |||
| 553 | //treat all words as an exact search (boosts complete word like "news" |
||
| 554 | //from prefixes of "newspaper") |
||
| 555 | $must_q['bool']['should'][] = array( 'multi_match' => array( |
||
| 556 | 'fields' => $this->all_fields, |
||
|
0 ignored issues
–
show
The property
all_fields does not exist. Did you maybe forget to declare it?
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code: class MyClass { }
$x = new MyClass();
$x->foo = true;
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: class MyClass {
public $foo;
}
$x = new MyClass();
$x->foo = true;
Loading history...
|
|||
| 557 | 'query' => $full_text, |
||
|
0 ignored issues
–
show
|
|||
| 558 | 'operator' => 'and', |
||
| 559 | 'type' => 'cross_fields', |
||
| 560 | ) ); |
||
| 561 | |||
| 562 | //always optimistically try and match the full text as a phrase |
||
| 563 | //prefix "the futu" should try to match "the future" |
||
| 564 | //otherwise the first stopword kinda breaks |
||
| 565 | //This also works as the prefix match for a single word "elasticsea" |
||
| 566 | $must_q['bool']['should'][] = array( 'multi_match' => array( |
||
| 567 | 'fields' => $this->phrase_fields, |
||
|
0 ignored issues
–
show
The property
phrase_fields does not exist. Did you maybe forget to declare it?
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code: class MyClass { }
$x = new MyClass();
$x->foo = true;
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: class MyClass {
public $foo;
}
$x = new MyClass();
$x->foo = true;
Loading history...
|
|||
| 568 | 'query' => $full_text, |
||
| 569 | 'operator' => 'and', |
||
| 570 | 'type' => 'phrase_prefix', |
||
| 571 | 'max_expansions' => 100, |
||
| 572 | ) ); |
||
| 573 | |||
| 574 | if ( $prefix_remainder ) { |
||
|
0 ignored issues
–
show
The expression
$prefix_remainder of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.
In PHP, under loose comparison (like For '' == false // true
'' == null // true
'ab' == false // false
'ab' == null // false
// It is often better to use strict comparison
'' === false // false
'' === null // false
Loading history...
|
|||
| 575 | //Multiple words found, so treat each word on its own and not just as |
||
| 576 | //a part of a phrase |
||
| 577 | //"elasticsearch lucen" => "elasticsearch" exact AND "lucen" prefix |
||
| 578 | $q['bool']['should'][] = array( 'bool' => array( |
||
|
0 ignored issues
–
show
Coding Style
Comprehensibility
introduced
by
$q was never initialized. Although not strictly required by PHP, it is generally a good practice to add $q = array(); before regardless.
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code. Let’s take a look at an example: foreach ($collection as $item) {
$myArray['foo'] = $item->getFoo();
if ($item->hasBar()) {
$myArray['bar'] = $item->getBar();
}
// do something with $myArray
}
As you can see in this example, the array This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop. Loading history...
|
|||
| 579 | 'must' => array( |
||
| 580 | array( 'multi_match' => array( |
||
| 581 | 'fields' => $this->phrase_fields, |
||
| 582 | 'query' => $prefix_word, |
||
| 583 | 'operator' => 'and', |
||
| 584 | 'type' => 'phrase_prefix', |
||
| 585 | 'max_expansions' => 100, |
||
| 586 | ) ), |
||
| 587 | array( 'multi_match' => array( |
||
| 588 | 'fields' => $this->all_fields, |
||
| 589 | 'query' => $prefix_remainder, |
||
| 590 | 'operator' => 'and', |
||
| 591 | 'type' => 'cross_fields', |
||
| 592 | ) ), |
||
| 593 | ) |
||
| 594 | ) ); |
||
| 595 | } |
||
| 596 | |||
| 597 | $this->add_query( $must_q ); |
||
| 598 | } |
||
| 599 | |||
| 600 | //Now add any boosting of the query |
||
| 601 | if ( ! empty( $args['boost_query_fields'] ) ) { |
||
| 602 | //treat all words as an exact search (boosts complete word like "news" |
||
| 603 | //from prefixes of "newspaper") |
||
| 604 | $this->add_query( array( |
||
| 605 | 'multi_match' => array( |
||
| 606 | 'fields' => $args['boost_query_fields'], |
||
| 607 | 'query' => $this->current_query, |
||
| 608 | 'operator' => $args['boost_query_operator'], |
||
| 609 | 'type' => $args['boost_query_type'], |
||
| 610 | ) ), 'should' ); |
||
| 611 | |||
| 612 | //optimistically boost the full phrase prefix match |
||
| 613 | $this->add_query( array( |
||
| 614 | 'multi_match' => array( |
||
| 615 | 'fields' => $args['boost_query_fields'], |
||
| 616 | 'query' => $this->current_query, |
||
| 617 | 'operator' => 'and', |
||
| 618 | 'type' => 'phrase_prefix', |
||
| 619 | 'max_expansions' => 100, |
||
| 620 | ) ) ); |
||
| 621 | } |
||
| 622 | } |
||
| 623 | } |
||
| 624 | |||
| 625 | /* |
||
| 626 | * Boost results based on the lang probability overlaps |
||
| 627 | * |
||
| 628 | * args: |
||
| 629 | * langs2prob: list of languages to search in with associated boosts |
||
| 630 | */ |
||
| 631 | function boost_lang_probs( $langs2prob ) { |
||
| 632 | foreach( $langs2prob as $l => $p ) { |
||
| 633 | $this->add_function( 'field_value_factor', array( |
||
| 634 | 'modifier' => 'none', |
||
| 635 | 'factor' => $p, |
||
| 636 | 'missing' => 0.01, //1% chance doc did not have right lang detected |
||
| 637 | ) ); |
||
| 638 | } |
||
| 639 | } |
||
| 640 | |||
| 641 | //////////////////////////////////// |
||
| 642 | // Helper Methods |
||
| 643 | |||
| 644 | //Get the text after some prefix. eg @gibrown, or @"Greg Brown" |
||
| 645 | protected function get_fields( $field_prefix ) { |
||
| 646 | $regex = '/' . $field_prefix . '(("[^"]+")|([^\\p{Z}]+))/'; |
||
| 647 | if ( preg_match_all( $regex, $this->current_query, $match ) ) { |
||
| 648 | return $match[1]; |
||
| 649 | } |
||
| 650 | return false; |
||
| 651 | } |
||
| 652 | |||
| 653 | //Remove the prefix and text from the query |
||
| 654 | protected function remove_fields( $field_name ) { |
||
| 655 | $regex = '/' . $field_name . '(("[^"]+")|([^\\p{Z}]+))/'; |
||
| 656 | $this->current_query = preg_replace( $regex, '', $this->current_query ); |
||
| 657 | } |
||
| 658 | |||
| 659 | //Best effort string truncation that splits on word breaks |
||
| 660 | function truncate_string( $string, $limit, $break=" " ) { |
||
| 661 | if ( mb_strwidth( $string ) <= $limit ) { |
||
| 662 | return $string; |
||
| 663 | } |
||
| 664 | |||
| 665 | // walk backwards from $limit to find first break |
||
| 666 | $breakpoint = $limit; |
||
| 667 | $broken = false; |
||
| 668 | while ( $breakpoint > 0 ) { |
||
| 669 | if ( $break === mb_strimwidth( $string, $breakpoint, 1 ) ) { |
||
| 670 | $string = mb_strimwidth( $string, 0, $breakpoint ); |
||
| 671 | $broken = true; |
||
| 672 | break; |
||
| 673 | } |
||
| 674 | $breakpoint--; |
||
| 675 | } |
||
| 676 | // if we weren't able to find a break, need to chop mid-word |
||
| 677 | if ( !$broken ) { |
||
| 678 | $string = mb_strimwidth( $string, 0, $limit ); |
||
| 679 | } |
||
| 680 | return $string; |
||
| 681 | } |
||
| 682 | |||
| 683 | } |
||
| 684 |
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using
the property is implicitly global.
To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2.