@@ -1,7 +1,7 @@ discard block |
||
1 | 1 | <?php |
2 | 2 | /** |
3 | - * $Id$ |
|
4 | - */ |
|
3 | + * $Id$ |
|
4 | + */ |
|
5 | 5 | |
6 | 6 | /** |
7 | 7 | * Copyright (c) 2001-2015, Andrew Aksyonoff |
@@ -37,2005 +37,2005 @@ discard block |
||
37 | 37 | */ |
38 | 38 | class Client |
39 | 39 | { |
40 | - /** |
|
41 | - * Searchd host |
|
42 | - * |
|
43 | - * @var string |
|
44 | - */ |
|
45 | - protected $host = 'localhost'; |
|
46 | - |
|
47 | - /** |
|
48 | - * Searchd port |
|
49 | - * |
|
50 | - * @var int |
|
51 | - */ |
|
52 | - protected $port = 9312; |
|
53 | - |
|
54 | - /** |
|
55 | - * How many records to seek from result-set start |
|
56 | - * |
|
57 | - * @var int |
|
58 | - */ |
|
59 | - protected $offset = 0; |
|
60 | - |
|
61 | - /** |
|
62 | - * How many records to return from result-set starting at offset |
|
63 | - * |
|
64 | - * @var int |
|
65 | - */ |
|
66 | - protected $limit = 20; |
|
67 | - |
|
68 | - /** |
|
69 | - * Query matching mode |
|
70 | - * |
|
71 | - * @var int |
|
72 | - */ |
|
73 | - protected $mode = self::MATCH_EXTENDED2; |
|
74 | - |
|
75 | - /** |
|
76 | - * Per-field weights (default is 1 for all fields) |
|
77 | - * |
|
78 | - * @var array |
|
79 | - */ |
|
80 | - protected $weights = array(); |
|
81 | - |
|
82 | - /** |
|
83 | - * Match sorting mode |
|
84 | - * |
|
85 | - * @var int |
|
86 | - */ |
|
87 | - protected $sort = self::SORT_RELEVANCE; |
|
88 | - |
|
89 | - /** |
|
90 | - * Attribute to sort by |
|
91 | - * |
|
92 | - * @var string |
|
93 | - */ |
|
94 | - protected $sort_by = ''; |
|
95 | - |
|
96 | - /** |
|
97 | - * Min ID to match (0 means no limit) |
|
98 | - * |
|
99 | - * @var int |
|
100 | - */ |
|
101 | - protected $min_id = 0; |
|
102 | - |
|
103 | - /** |
|
104 | - * Max ID to match (0 means no limit) |
|
105 | - * |
|
106 | - * @var int |
|
107 | - */ |
|
108 | - protected $max_id = 0; |
|
109 | - |
|
110 | - /** |
|
111 | - * Search filters |
|
112 | - * |
|
113 | - * @var array |
|
114 | - */ |
|
115 | - protected $filters = array(); |
|
116 | - |
|
117 | - /** |
|
118 | - * Group-by attribute name |
|
119 | - * |
|
120 | - * @var string |
|
121 | - */ |
|
122 | - protected $group_by = ''; |
|
123 | - |
|
124 | - /** |
|
125 | - * Group-by function (to pre-process group-by attribute value with) |
|
126 | - * |
|
127 | - * @var int |
|
128 | - */ |
|
129 | - protected $group_func = self::GROUP_BY_DAY; |
|
130 | - |
|
131 | - /** |
|
132 | - * Group-by sorting clause (to sort groups in result set with) |
|
133 | - * |
|
134 | - * @var string |
|
135 | - */ |
|
136 | - protected $group_sort = '@group desc'; |
|
137 | - |
|
138 | - /** |
|
139 | - * Group-by count-distinct attribute |
|
140 | - * |
|
141 | - * @var string |
|
142 | - */ |
|
143 | - protected $group_distinct = ''; |
|
144 | - |
|
145 | - /** |
|
146 | - * Max matches to retrieve |
|
147 | - * |
|
148 | - * @var int |
|
149 | - */ |
|
150 | - protected $max_matches = 1000; |
|
151 | - |
|
152 | - /** |
|
153 | - * Cutoff to stop searching at |
|
154 | - * |
|
155 | - * @var int |
|
156 | - */ |
|
157 | - protected $cutoff = 0; |
|
158 | - |
|
159 | - /** |
|
160 | - * Distributed retries count |
|
161 | - * |
|
162 | - * @var int |
|
163 | - */ |
|
164 | - protected $retry_count = 0; |
|
165 | - |
|
166 | - /** |
|
167 | - * Distributed retries delay |
|
168 | - * |
|
169 | - * @var int |
|
170 | - */ |
|
171 | - protected $retry_delay = 0; |
|
172 | - |
|
173 | - /** |
|
174 | - * Geographical anchor point |
|
175 | - * |
|
176 | - * @var array |
|
177 | - */ |
|
178 | - protected $anchor = array(); |
|
179 | - |
|
180 | - /** |
|
181 | - * Per-index weights |
|
182 | - * |
|
183 | - * @var array |
|
184 | - */ |
|
185 | - protected $index_weights = array(); |
|
186 | - |
|
187 | - /** |
|
188 | - * Ranking mode |
|
189 | - * |
|
190 | - * @var int |
|
191 | - */ |
|
192 | - protected $ranker = self::RANK_PROXIMITY_BM25; |
|
193 | - |
|
194 | - /** |
|
195 | - * Ranking mode expression (for self::RANK_EXPR) |
|
196 | - * |
|
197 | - * @var string |
|
198 | - */ |
|
199 | - protected $rank_expr = ''; |
|
200 | - |
|
201 | - /** |
|
202 | - * Max query time, milliseconds (0 means no limit) |
|
203 | - * |
|
204 | - * @var int |
|
205 | - */ |
|
206 | - protected $max_query_time = 0; |
|
207 | - |
|
208 | - /** |
|
209 | - * Per-field-name weights |
|
210 | - * |
|
211 | - * @var array |
|
212 | - */ |
|
213 | - protected $field_weights = array(); |
|
214 | - |
|
215 | - /** |
|
216 | - * Per-query attribute values overrides |
|
217 | - * |
|
218 | - * @var array |
|
219 | - */ |
|
220 | - protected $overrides = array(); |
|
221 | - |
|
222 | - /** |
|
223 | - * Select-list (attributes or expressions, with optional aliases) |
|
224 | - * |
|
225 | - * @var string |
|
226 | - */ |
|
227 | - protected $select = '*'; |
|
228 | - |
|
229 | - /** |
|
230 | - * Per-query various flags |
|
231 | - * |
|
232 | - * @var int |
|
233 | - */ |
|
234 | - protected $query_flags = 0; |
|
235 | - |
|
236 | - /** |
|
237 | - * Per-query max_predicted_time |
|
238 | - * |
|
239 | - * @var int |
|
240 | - */ |
|
241 | - protected $predicted_time = 0; |
|
242 | - |
|
243 | - /** |
|
244 | - * Outer match sort by |
|
245 | - * |
|
246 | - * @var string |
|
247 | - */ |
|
248 | - protected $outer_order_by = ''; |
|
249 | - |
|
250 | - /** |
|
251 | - * Outer offset |
|
252 | - * |
|
253 | - * @var int |
|
254 | - */ |
|
255 | - protected $outer_offset = 0; |
|
256 | - |
|
257 | - /** |
|
258 | - * Outer limit |
|
259 | - * |
|
260 | - * @var int |
|
261 | - */ |
|
262 | - protected $outer_limit = 0; |
|
263 | - |
|
264 | - /** |
|
265 | - * @var bool |
|
266 | - */ |
|
267 | - protected $has_outer = false; |
|
268 | - |
|
269 | - /** |
|
270 | - * Last error message |
|
271 | - * |
|
272 | - * @var string |
|
273 | - */ |
|
274 | - protected $error = ''; |
|
275 | - |
|
276 | - /** |
|
277 | - * Last warning message |
|
278 | - * |
|
279 | - * @var string |
|
280 | - */ |
|
281 | - protected $warning = ''; |
|
282 | - |
|
283 | - /** |
|
284 | - * Connection error vs remote error flag |
|
285 | - * |
|
286 | - * @var bool |
|
287 | - */ |
|
288 | - protected $conn_error = false; |
|
289 | - |
|
290 | - /** |
|
291 | - * Requests array for multi-query |
|
292 | - * |
|
293 | - * @var array |
|
294 | - */ |
|
295 | - protected $reqs = array(); |
|
296 | - |
|
297 | - /** |
|
298 | - * Stored mbstring encoding |
|
299 | - * |
|
300 | - * @var string |
|
301 | - */ |
|
302 | - protected $mbenc = ''; |
|
303 | - |
|
304 | - /** |
|
305 | - * Whether $result['matches'] should be a hash or an array |
|
306 | - * |
|
307 | - * @var bool |
|
308 | - */ |
|
309 | - protected $array_result = false; |
|
310 | - |
|
311 | - /** |
|
312 | - * Connect timeout |
|
313 | - * |
|
314 | - * @var int|float |
|
315 | - */ |
|
316 | - protected $timeout = 0; |
|
317 | - |
|
318 | - /** |
|
319 | - * @var string |
|
320 | - */ |
|
321 | - protected $path = ''; |
|
322 | - |
|
323 | - /** |
|
324 | - * @var resource|bool |
|
325 | - */ |
|
326 | - protected $socket = false; |
|
327 | - |
|
328 | - // known searchd commands |
|
329 | - const SEARCHD_COMMAND_SEARCH = 0; |
|
330 | - const SEARCHD_COMMAND_EXCERPT = 1; |
|
331 | - const SEARCHD_COMMAND_UPDATE = 2; |
|
332 | - const SEARCHD_COMMAND_KEYWORDS = 3; |
|
333 | - const SEARCHD_COMMAND_PERSIST = 4; |
|
334 | - const SEARCHD_COMMAND_STATUS = 5; |
|
335 | - const SEARCHD_COMMAND_FLUSH_ATTRS = 7; |
|
336 | - |
|
337 | - // current client-side command implementation versions |
|
338 | - const VER_COMMAND_SEARCH = 0x11E; |
|
339 | - const VER_COMMAND_EXCERPT = 0x104; |
|
340 | - const VER_COMMAND_UPDATE = 0x103; |
|
341 | - const VER_COMMAND_KEYWORDS = 0x100; |
|
342 | - const VER_COMMAND_STATUS = 0x101; |
|
343 | - const VER_COMMAND_QUERY = 0x100; |
|
344 | - const VER_COMMAND_FLUSH_ATTRS = 0x100; |
|
345 | - |
|
346 | - // known searchd status codes |
|
347 | - const SEARCHD_OK = 0; |
|
348 | - const SEARCHD_ERROR = 1; |
|
349 | - const SEARCHD_RETRY = 2; |
|
350 | - const SEARCHD_WARNING = 3; |
|
351 | - |
|
352 | - // known match modes |
|
353 | - const MATCH_ALL = 0; |
|
354 | - const MATCH_ANY = 1; |
|
355 | - const MATCH_PHRASE = 2; |
|
356 | - const MATCH_BOOLEAN = 3; |
|
357 | - const MATCH_EXTENDED = 4; |
|
358 | - const MATCH_FULL_SCAN = 5; |
|
359 | - const MATCH_EXTENDED2 = 6; // extended engine V2 (TEMPORARY, WILL BE REMOVED) |
|
360 | - |
|
361 | - // known ranking modes (ext2 only) |
|
362 | - const RANK_PROXIMITY_BM25 = 0; // default mode, phrase proximity major factor and BM25 minor one |
|
363 | - const RANK_BM25 = 1; // statistical mode, BM25 ranking only (faster but worse quality) |
|
364 | - const RANK_NONE = 2; // no ranking, all matches get a weight of 1 |
|
365 | - const RANK_WORD_COUNT = 3; // simple word-count weighting, rank is a weighted sum of per-field keyword |
|
366 | - // occurrence counts |
|
367 | - const RANK_PROXIMITY = 4; |
|
368 | - const RANK_MATCH_ANY = 5; |
|
369 | - const RANK_FIELD_MASK = 6; |
|
370 | - const RANK_SPH04 = 7; |
|
371 | - const RANK_EXPR = 8; |
|
372 | - const RANK_TOTAL = 9; |
|
373 | - |
|
374 | - // known sort modes |
|
375 | - const SORT_RELEVANCE = 0; |
|
376 | - const SORT_ATTR_DESC = 1; |
|
377 | - const SORT_ATTR_ASC = 2; |
|
378 | - const SORT_TIME_SEGMENTS = 3; |
|
379 | - const SORT_EXTENDED = 4; |
|
380 | - const SORT_EXPR = 5; |
|
381 | - |
|
382 | - // known filter types |
|
383 | - const FILTER_VALUES = 0; |
|
384 | - const FILTER_RANGE = 1; |
|
385 | - const FILTER_FLOAT_RANGE = 2; |
|
386 | - const FILTER_STRING = 3; |
|
387 | - |
|
388 | - // known attribute types |
|
389 | - const ATTR_INTEGER = 1; |
|
390 | - const ATTR_TIMESTAMP = 2; |
|
391 | - const ATTR_ORDINAL = 3; |
|
392 | - const ATTR_BOOL = 4; |
|
393 | - const ATTR_FLOAT = 5; |
|
394 | - const ATTR_BIGINT = 6; |
|
395 | - const ATTR_STRING = 7; |
|
396 | - const ATTR_FACTORS = 1001; |
|
397 | - const ATTR_MULTI = 0x40000001; |
|
398 | - const ATTR_MULTI64 = 0x40000002; |
|
399 | - |
|
400 | - // known grouping functions |
|
401 | - const GROUP_BY_DAY = 0; |
|
402 | - const GROUP_BY_WEEK = 1; |
|
403 | - const GROUP_BY_MONTH = 2; |
|
404 | - const GROUP_BY_YEAR = 3; |
|
405 | - const GROUP_BY_ATTR = 4; |
|
406 | - const GROUP_BY_ATTR_PAIR = 5; |
|
407 | - |
|
408 | - ///////////////////////////////////////////////////////////////////////////// |
|
409 | - // common stuff |
|
410 | - ///////////////////////////////////////////////////////////////////////////// |
|
411 | - |
|
412 | - public function __construct() |
|
413 | - { |
|
414 | - // default idf=tfidf_normalized |
|
415 | - $this->query_flags = setBit(0, 6, true); |
|
416 | - } |
|
417 | - |
|
418 | - public function __destruct() |
|
419 | - { |
|
420 | - if ($this->socket !== false) { |
|
421 | - fclose($this->socket); |
|
422 | - } |
|
423 | - } |
|
424 | - |
|
425 | - /** |
|
426 | - * @return string |
|
427 | - */ |
|
428 | - public function getLastError() |
|
429 | - { |
|
430 | - return $this->error; |
|
431 | - } |
|
432 | - |
|
433 | - /** |
|
434 | - * @return string |
|
435 | - */ |
|
436 | - public function getLastWarning() |
|
437 | - { |
|
438 | - return $this->warning; |
|
439 | - } |
|
440 | - |
|
441 | - /** |
|
442 | - * Get last error flag (to tell network connection errors from searchd errors or broken responses) |
|
443 | - * |
|
444 | - * @return bool |
|
445 | - */ |
|
446 | - public function isConnectError() |
|
447 | - { |
|
448 | - return $this->conn_error; |
|
449 | - } |
|
450 | - |
|
451 | - /** |
|
452 | - * Set searchd host name and port |
|
453 | - * |
|
454 | - * @param string $host |
|
455 | - * @param int $port |
|
456 | - */ |
|
457 | - public function setServer($host, $port = 0) |
|
458 | - { |
|
459 | - assert(is_string($host)); |
|
460 | - if ($host[0] == '/') { |
|
461 | - $this->path = 'unix://' . $host; |
|
462 | - return; |
|
463 | - } |
|
464 | - if (substr($host, 0, 7) == 'unix://') { |
|
465 | - $this->path = $host; |
|
466 | - return; |
|
467 | - } |
|
468 | - |
|
469 | - $this->host = $host; |
|
470 | - $port = intval($port); |
|
471 | - assert(0 <= $port && $port < 65536); |
|
472 | - $this->port = $port == 0 ? 9312 : $port; |
|
473 | - $this->path = ''; |
|
474 | - } |
|
475 | - |
|
476 | - /** |
|
477 | - * Set server connection timeout (0 to remove) |
|
478 | - * |
|
479 | - * @param int|float|string $timeout |
|
480 | - */ |
|
481 | - public function setConnectTimeout($timeout) |
|
482 | - { |
|
483 | - assert(is_numeric($timeout)); |
|
484 | - $this->timeout = $timeout; |
|
485 | - } |
|
486 | - |
|
487 | - /** |
|
488 | - * @param resource $handle |
|
489 | - * @param string $data |
|
490 | - * @param int $length |
|
491 | - * |
|
492 | - * @return bool |
|
493 | - */ |
|
494 | - protected function send($handle, $data, $length) |
|
495 | - { |
|
496 | - if (feof($handle) || fwrite($handle, $data, $length) !== $length) { |
|
497 | - $this->error = 'connection unexpectedly closed (timed out?)'; |
|
498 | - $this->conn_error = true; |
|
499 | - return false; |
|
500 | - } |
|
501 | - return true; |
|
502 | - } |
|
503 | - |
|
504 | - ///////////////////////////////////////////////////////////////////////////// |
|
505 | - |
|
506 | - /** |
|
507 | - * Enter mbstring workaround mode |
|
508 | - */ |
|
509 | - protected function mbPush() |
|
510 | - { |
|
511 | - $this->mbenc = ''; |
|
512 | - if (ini_get('mbstring.func_overload') & 2) { |
|
513 | - $this->mbenc = mb_internal_encoding(); |
|
514 | - mb_internal_encoding('latin1'); |
|
515 | - } |
|
516 | - } |
|
517 | - |
|
518 | - /** |
|
519 | - * Leave mbstring workaround mode |
|
520 | - */ |
|
521 | - protected function mbPop() |
|
522 | - { |
|
523 | - if ($this->mbenc) { |
|
524 | - mb_internal_encoding($this->mbenc); |
|
525 | - } |
|
526 | - } |
|
527 | - |
|
528 | - /** |
|
529 | - * Connect to searchd server |
|
530 | - * |
|
531 | - * @return bool|resource |
|
532 | - */ |
|
533 | - protected function connect() |
|
534 | - { |
|
535 | - if (is_resource($this->socket)) { |
|
536 | - // we are in persistent connection mode, so we have a socket |
|
537 | - // however, need to check whether it's still alive |
|
538 | - if (!feof($this->socket)) { |
|
539 | - return $this->socket; |
|
540 | - } |
|
541 | - |
|
542 | - // force reopen |
|
543 | - $this->socket = false; |
|
544 | - } |
|
545 | - |
|
546 | - $errno = 0; |
|
547 | - $errstr = ''; |
|
548 | - $this->conn_error = false; |
|
549 | - |
|
550 | - if ($this->path) { |
|
551 | - $host = $this->path; |
|
552 | - $port = 0; |
|
553 | - } else { |
|
554 | - $host = $this->host; |
|
555 | - $port = $this->port; |
|
556 | - } |
|
557 | - |
|
558 | - if ($this->timeout <= 0) { |
|
559 | - $fp = @fsockopen($host, $port, $errno, $errstr); |
|
560 | - } else { |
|
561 | - $fp = @fsockopen($host, $port, $errno, $errstr, $this->timeout); |
|
562 | - } |
|
563 | - |
|
564 | - if (!is_resource($fp)) { |
|
565 | - if ($this->path) { |
|
566 | - $location = $this->path; |
|
567 | - } else { |
|
568 | - $location = "{$this->host}:{$this->port}"; |
|
569 | - } |
|
570 | - |
|
571 | - $errstr = trim($errstr); |
|
572 | - $this->error = "connection to $location failed (errno=$errno, msg=$errstr)"; |
|
573 | - $this->conn_error = true; |
|
574 | - return false; |
|
575 | - } |
|
576 | - |
|
577 | - // send my version |
|
578 | - // this is a subtle part. we must do it before (!) reading back from searchd. |
|
579 | - // because otherwise under some conditions (reported on FreeBSD for instance) |
|
580 | - // TCP stack could throttle write-write-read pattern because of Nagle. |
|
581 | - if (!$this->send($fp, pack('N', 1), 4)) { |
|
582 | - fclose($fp); |
|
583 | - $this->error = 'failed to send client protocol version'; |
|
584 | - return false; |
|
585 | - } |
|
586 | - |
|
587 | - // check version |
|
588 | - list(, $v) = unpack('N*', fread($fp, 4)); |
|
589 | - $v = (int)$v; |
|
590 | - if ($v < 1) { |
|
591 | - fclose($fp); |
|
592 | - $this->error = "expected searchd protocol version 1+, got version '$v'"; |
|
593 | - return false; |
|
594 | - } |
|
595 | - |
|
596 | - return $fp; |
|
597 | - } |
|
598 | - |
|
599 | - /** |
|
600 | - * Get and check response packet from searchd server |
|
601 | - * |
|
602 | - * @param resource $fp |
|
603 | - * @param int $client_ver |
|
604 | - * |
|
605 | - * @return bool|string |
|
606 | - */ |
|
607 | - protected function getResponse($fp, $client_ver) |
|
608 | - { |
|
609 | - $response = ''; |
|
610 | - $len = 0; |
|
611 | - |
|
612 | - $header = fread($fp, 8); |
|
613 | - if (strlen($header) == 8) { |
|
614 | - list($status, $ver, $len) = array_values(unpack('n2a/Nb', $header)); |
|
615 | - $left = $len; |
|
616 | - while ($left > 0 && !feof($fp)) { |
|
617 | - $chunk = fread($fp, min(8192, $left)); |
|
618 | - if ($chunk) { |
|
619 | - $response .= $chunk; |
|
620 | - $left -= strlen($chunk); |
|
621 | - } |
|
622 | - } |
|
623 | - } |
|
624 | - |
|
625 | - if ($this->socket === false) { |
|
626 | - fclose($fp); |
|
627 | - } |
|
628 | - |
|
629 | - // check response |
|
630 | - $read = strlen($response); |
|
631 | - if (!$response || $read != $len) { |
|
632 | - $this->error = $len |
|
633 | - ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" |
|
634 | - : 'received zero-sized searchd response'; |
|
635 | - return false; |
|
636 | - } |
|
637 | - |
|
638 | - switch ($status) { |
|
639 | - case self::SEARCHD_WARNING: |
|
640 | - list(, $wlen) = unpack('N*', substr($response, 0, 4)); |
|
641 | - $this->warning = substr($response, 4, $wlen); |
|
642 | - return substr($response, 4 + $wlen); |
|
643 | - case self::SEARCHD_ERROR: |
|
644 | - $this->error = 'searchd error: ' . substr($response, 4); |
|
645 | - return false; |
|
646 | - case self::SEARCHD_RETRY: |
|
647 | - $this->error = 'temporary searchd error: ' . substr($response, 4); |
|
648 | - return false; |
|
649 | - case self::SEARCHD_OK: |
|
650 | - if ($ver < $client_ver) { // check version |
|
651 | - $this->warning = sprintf( |
|
652 | - 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work', |
|
653 | - $ver >> 8, |
|
654 | - $ver & 0xff, |
|
655 | - $client_ver >> 8, |
|
656 | - $client_ver & 0xff |
|
657 | - ); |
|
658 | - } |
|
659 | - |
|
660 | - return $response; |
|
661 | - default: |
|
662 | - $this->error = "unknown status code '$status'"; |
|
663 | - return false; |
|
664 | - } |
|
665 | - } |
|
666 | - |
|
667 | - ///////////////////////////////////////////////////////////////////////////// |
|
668 | - // searching |
|
669 | - ///////////////////////////////////////////////////////////////////////////// |
|
670 | - |
|
671 | - /** |
|
672 | - * Set offset and count into result set, and optionally set max-matches and cutoff limits |
|
673 | - * |
|
674 | - * @param int $offset |
|
675 | - * @param int $limit |
|
676 | - * @param int $max |
|
677 | - * @param int $cutoff |
|
678 | - */ |
|
679 | - public function setLimits($offset, $limit, $max = 0, $cutoff = 0) |
|
680 | - { |
|
681 | - assert(is_int($offset)); |
|
682 | - assert(is_int($limit)); |
|
683 | - assert($offset >= 0); |
|
684 | - assert($limit > 0); |
|
685 | - assert($max >= 0); |
|
686 | - $this->offset = $offset; |
|
687 | - $this->limit = $limit; |
|
688 | - if ($max > 0) { |
|
689 | - $this->max_matches = $max; |
|
690 | - } |
|
691 | - if ($cutoff > 0) { |
|
692 | - $this->cutoff = $cutoff; |
|
693 | - } |
|
694 | - } |
|
695 | - |
|
696 | - /** |
|
697 | - * Set maximum query time, in milliseconds, per-index, 0 means 'do not limit' |
|
698 | - * |
|
699 | - * @param int $max |
|
700 | - */ |
|
701 | - public function setMaxQueryTime($max) |
|
702 | - { |
|
703 | - assert(is_int($max)); |
|
704 | - assert($max >= 0); |
|
705 | - $this->max_query_time = $max; |
|
706 | - } |
|
707 | - |
|
708 | - /** |
|
709 | - * Set matching mode |
|
710 | - * |
|
711 | - * @param int $mode |
|
712 | - */ |
|
713 | - public function setMatchMode($mode) |
|
714 | - { |
|
715 | - trigger_error( |
|
716 | - 'DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API', |
|
717 | - E_USER_DEPRECATED |
|
718 | - ); |
|
719 | - assert(in_array($mode, array( |
|
720 | - self::MATCH_ALL, |
|
721 | - self::MATCH_ANY, |
|
722 | - self::MATCH_PHRASE, |
|
723 | - self::MATCH_BOOLEAN, |
|
724 | - self::MATCH_EXTENDED, |
|
725 | - self::MATCH_FULL_SCAN, |
|
726 | - self::MATCH_EXTENDED2 |
|
727 | - ))); |
|
728 | - $this->mode = $mode; |
|
729 | - } |
|
730 | - |
|
731 | - /** |
|
732 | - * Set ranking mode |
|
733 | - * |
|
734 | - * @param int $ranker |
|
735 | - * @param string $rank_expr |
|
736 | - */ |
|
737 | - public function setRankingMode($ranker, $rank_expr='') |
|
738 | - { |
|
739 | - assert($ranker === 0 || $ranker >= 1 && $ranker < self::RANK_TOTAL); |
|
740 | - assert(is_string($rank_expr)); |
|
741 | - $this->ranker = $ranker; |
|
742 | - $this->rank_expr = $rank_expr; |
|
743 | - } |
|
744 | - |
|
745 | - /** |
|
746 | - * Set matches sorting mode |
|
747 | - * |
|
748 | - * @param int $mode |
|
749 | - * @param string $sort_by |
|
750 | - */ |
|
751 | - public function setSortMode($mode, $sort_by = '') |
|
752 | - { |
|
753 | - assert(in_array($mode, array( |
|
754 | - self::SORT_RELEVANCE, |
|
755 | - self::SORT_ATTR_DESC, |
|
756 | - self::SORT_ATTR_ASC, |
|
757 | - self::SORT_TIME_SEGMENTS, |
|
758 | - self::SORT_EXTENDED, |
|
759 | - self::SORT_EXPR |
|
760 | - ))); |
|
761 | - assert(is_string($sort_by)); |
|
762 | - assert($mode == self::SORT_RELEVANCE || strlen($sort_by) > 0); |
|
763 | - |
|
764 | - $this->sort = $mode; |
|
765 | - $this->sort_by = $sort_by; |
|
766 | - } |
|
767 | - |
|
768 | - /** |
|
769 | - * Bind per-field weights by order |
|
770 | - * |
|
771 | - * @deprecated use setFieldWeights() instead |
|
772 | - */ |
|
773 | - public function setWeights() |
|
774 | - { |
|
775 | - throw new \RuntimeException('This method is now deprecated; please use setFieldWeights instead'); |
|
776 | - } |
|
777 | - |
|
778 | - /** |
|
779 | - * Bind per-field weights by name |
|
780 | - * |
|
781 | - * @param array $weights |
|
782 | - */ |
|
783 | - public function setFieldWeights(array $weights) |
|
784 | - { |
|
785 | - foreach ($weights as $name => $weight) { |
|
786 | - assert(is_string($name)); |
|
787 | - assert(is_int($weight)); |
|
788 | - } |
|
789 | - $this->field_weights = $weights; |
|
790 | - } |
|
791 | - |
|
792 | - /** |
|
793 | - * Bind per-index weights by name |
|
794 | - * |
|
795 | - * @param array $weights |
|
796 | - */ |
|
797 | - public function setIndexWeights(array $weights) |
|
798 | - { |
|
799 | - foreach ($weights as $index => $weight) { |
|
800 | - assert(is_string($index)); |
|
801 | - assert(is_int($weight)); |
|
802 | - } |
|
803 | - $this->index_weights = $weights; |
|
804 | - } |
|
805 | - |
|
806 | - /** |
|
807 | - * Set IDs range to match. Only match records if document ID is beetwen $min and $max (inclusive) |
|
808 | - * |
|
809 | - * @param int $min |
|
810 | - * @param int $max |
|
811 | - */ |
|
812 | - public function setIDRange($min, $max) |
|
813 | - { |
|
814 | - assert(is_numeric($min)); |
|
815 | - assert(is_numeric($max)); |
|
816 | - assert($min <= $max); |
|
817 | - |
|
818 | - $this->min_id = $min; |
|
819 | - $this->max_id = $max; |
|
820 | - } |
|
821 | - |
|
822 | - /** |
|
823 | - * Set values set filter. Only match records where $attribute value is in given set |
|
824 | - * |
|
825 | - * @param string $attribute |
|
826 | - * @param array $values |
|
827 | - * @param bool $exclude |
|
828 | - */ |
|
829 | - public function setFilter($attribute, array $values, $exclude = false) |
|
830 | - { |
|
831 | - assert(is_string($attribute)); |
|
832 | - assert(count($values)); |
|
833 | - |
|
834 | - foreach ($values as $value) { |
|
835 | - assert(is_numeric($value)); |
|
836 | - } |
|
837 | - |
|
838 | - $this->filters[] = array( |
|
839 | - 'type' => self::FILTER_VALUES, |
|
840 | - 'attr' => $attribute, |
|
841 | - 'exclude' => $exclude, |
|
842 | - 'values' => $values |
|
843 | - ); |
|
844 | - } |
|
845 | - |
|
846 | - /** |
|
847 | - * Set string filter |
|
848 | - * Only match records where $attribute value is equal |
|
849 | - * |
|
850 | - * @param string $attribute |
|
851 | - * @param string $value |
|
852 | - * @param bool $exclude |
|
853 | - */ |
|
854 | - public function setFilterString($attribute, $value, $exclude = false) |
|
855 | - { |
|
856 | - assert(is_string($attribute)); |
|
857 | - assert(is_string($value)); |
|
858 | - $this->filters[] = array( |
|
859 | - 'type' => self::FILTER_STRING, |
|
860 | - 'attr' => $attribute, |
|
861 | - 'exclude' => $exclude, |
|
862 | - 'value' => $value |
|
863 | - ); |
|
864 | - } |
|
865 | - |
|
866 | - /** |
|
867 | - * Set range filter |
|
868 | - * Only match records if $attribute value is beetwen $min and $max (inclusive) |
|
869 | - * |
|
870 | - * @param string $attribute |
|
871 | - * @param int $min |
|
872 | - * @param int $max |
|
873 | - * @param bool $exclude |
|
874 | - */ |
|
875 | - public function setFilterRange($attribute, $min, $max, $exclude = false) |
|
876 | - { |
|
877 | - assert(is_string($attribute)); |
|
878 | - assert(is_numeric($min)); |
|
879 | - assert(is_numeric($max)); |
|
880 | - assert($min <= $max); |
|
881 | - |
|
882 | - $this->filters[] = array( |
|
883 | - 'type' => self::FILTER_RANGE, |
|
884 | - 'attr' => $attribute, |
|
885 | - 'exclude' => $exclude, |
|
886 | - 'min' => $min, |
|
887 | - 'max' => $max |
|
888 | - ); |
|
889 | - } |
|
890 | - |
|
891 | - /** |
|
892 | - * Set float range filter |
|
893 | - * Only match records if $attribute value is beetwen $min and $max (inclusive) |
|
894 | - * |
|
895 | - * @param string $attribute |
|
896 | - * @param float $min |
|
897 | - * @param float $max |
|
898 | - * @param bool $exclude |
|
899 | - */ |
|
900 | - public function setFilterFloatRange($attribute, $min, $max, $exclude = false) |
|
901 | - { |
|
902 | - assert(is_string($attribute)); |
|
903 | - assert(is_float($min)); |
|
904 | - assert(is_float($max)); |
|
905 | - assert($min <= $max); |
|
906 | - |
|
907 | - $this->filters[] = array( |
|
908 | - 'type' => self::FILTER_FLOAT_RANGE, |
|
909 | - 'attr' => $attribute, |
|
910 | - 'exclude' => $exclude, |
|
911 | - 'min' => $min, |
|
912 | - 'max' => $max |
|
913 | - ); |
|
914 | - } |
|
915 | - |
|
916 | - /** |
|
917 | - * Setup anchor point for geosphere distance calculations |
|
918 | - * Required to use @geodist in filters and sorting |
|
919 | - * Latitude and longitude must be in radians |
|
920 | - * |
|
921 | - * @param string $attr_lat |
|
922 | - * @param string $attr_long |
|
923 | - * @param float $lat |
|
924 | - * @param float $long |
|
925 | - */ |
|
926 | - public function setGeoAnchor($attr_lat, $attr_long, $lat, $long) |
|
927 | - { |
|
928 | - assert(is_string($attr_lat)); |
|
929 | - assert(is_string($attr_long)); |
|
930 | - assert(is_float($lat)); |
|
931 | - assert(is_float($long)); |
|
932 | - |
|
933 | - $this->anchor = array( |
|
934 | - 'attrlat' => $attr_lat, |
|
935 | - 'attrlong' => $attr_long, |
|
936 | - 'lat' => $lat, |
|
937 | - 'long' => $long |
|
938 | - ); |
|
939 | - } |
|
940 | - |
|
941 | - /** |
|
942 | - * Set grouping attribute and function |
|
943 | - * |
|
944 | - * @param string $attribute |
|
945 | - * @param int $func |
|
946 | - * @param string $group_sort |
|
947 | - */ |
|
948 | - public function setGroupBy($attribute, $func, $group_sort = '@group desc') |
|
949 | - { |
|
950 | - assert(is_string($attribute)); |
|
951 | - assert(is_string($group_sort)); |
|
952 | - assert(in_array($func, array( |
|
953 | - self::GROUP_BY_DAY, |
|
954 | - self::GROUP_BY_WEEK, |
|
955 | - self::GROUP_BY_MONTH, |
|
956 | - self::GROUP_BY_YEAR, |
|
957 | - self::GROUP_BY_ATTR, |
|
958 | - self::GROUP_BY_ATTR_PAIR |
|
959 | - ))); |
|
960 | - |
|
961 | - $this->group_by = $attribute; |
|
962 | - $this->group_func = $func; |
|
963 | - $this->group_sort = $group_sort; |
|
964 | - } |
|
965 | - |
|
966 | - /** |
|
967 | - * Set count-distinct attribute for group-by queries |
|
968 | - * |
|
969 | - * @param string $attribute |
|
970 | - */ |
|
971 | - public function setGroupDistinct($attribute) |
|
972 | - { |
|
973 | - assert(is_string($attribute)); |
|
974 | - $this->group_distinct = $attribute; |
|
975 | - } |
|
976 | - |
|
977 | - /** |
|
978 | - * Set distributed retries count and delay |
|
979 | - * |
|
980 | - * @param int $count |
|
981 | - * @param int $delay |
|
982 | - */ |
|
983 | - public function setRetries($count, $delay = 0) |
|
984 | - { |
|
985 | - assert(is_int($count) && $count >= 0); |
|
986 | - assert(is_int($delay) && $delay >= 0); |
|
987 | - $this->retry_count = $count; |
|
988 | - $this->retry_delay = $delay; |
|
989 | - } |
|
990 | - |
|
991 | - /** |
|
992 | - * Set result set format (hash or array; hash by default) |
|
993 | - * PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs |
|
994 | - * |
|
995 | - * @param bool $array_result |
|
996 | - */ |
|
997 | - public function setArrayResult($array_result) |
|
998 | - { |
|
999 | - assert(is_bool($array_result)); |
|
1000 | - $this->array_result = $array_result; |
|
1001 | - } |
|
1002 | - |
|
1003 | - /** |
|
1004 | - * Set attribute values override |
|
1005 | - * There can be only one override per attribute |
|
1006 | - * $values must be a hash that maps document IDs to attribute values |
|
1007 | - * |
|
1008 | - * @deprecated Do not call this method. Use SphinxQL REMAP() function instead. |
|
1009 | - * |
|
1010 | - * @param string $attr_name |
|
1011 | - * @param string $attr_type |
|
1012 | - * @param array $values |
|
1013 | - */ |
|
1014 | - public function setOverride($attr_name, $attr_type, array $values) |
|
1015 | - { |
|
1016 | - trigger_error( |
|
1017 | - 'DEPRECATED: Do not call this method. Use SphinxQL REMAP() function instead.', |
|
1018 | - E_USER_DEPRECATED |
|
1019 | - ); |
|
1020 | - assert(is_string($attr_name)); |
|
1021 | - assert(in_array($attr_type, array( |
|
1022 | - self::ATTR_INTEGER, |
|
1023 | - self::ATTR_TIMESTAMP, |
|
1024 | - self::ATTR_BOOL, |
|
1025 | - self::ATTR_FLOAT, |
|
1026 | - self::ATTR_BIGINT |
|
1027 | - ))); |
|
1028 | - |
|
1029 | - $this->overrides[$attr_name] = array( |
|
1030 | - 'attr' => $attr_name, |
|
1031 | - 'type' => $attr_type, |
|
1032 | - 'values' => $values |
|
1033 | - ); |
|
1034 | - } |
|
1035 | - |
|
1036 | - /** |
|
1037 | - * Set select-list (attributes or expressions), SQL-like syntax |
|
1038 | - * |
|
1039 | - * @param string $select |
|
1040 | - */ |
|
1041 | - public function setSelect($select) |
|
1042 | - { |
|
1043 | - assert(is_string($select)); |
|
1044 | - $this->select = $select; |
|
1045 | - } |
|
1046 | - |
|
1047 | - /** |
|
1048 | - * @param string $flag_name |
|
1049 | - * @param string|int $flag_value |
|
1050 | - */ |
|
1051 | - public function setQueryFlag($flag_name, $flag_value) |
|
1052 | - { |
|
1053 | - $known_names = array( |
|
1054 | - 'reverse_scan', |
|
1055 | - 'sort_method', |
|
1056 | - 'max_predicted_time', |
|
1057 | - 'boolean_simplify', |
|
1058 | - 'idf', |
|
1059 | - 'global_idf', |
|
1060 | - 'low_priority' |
|
1061 | - ); |
|
1062 | - $flags = array ( |
|
1063 | - 'reverse_scan' => array(0, 1), |
|
1064 | - 'sort_method' => array('pq', 'kbuffer'), |
|
1065 | - 'max_predicted_time' => array(0), |
|
1066 | - 'boolean_simplify' => array(true, false), |
|
1067 | - 'idf' => array ('normalized', 'plain', 'tfidf_normalized', 'tfidf_unnormalized'), |
|
1068 | - 'global_idf' => array(true, false), |
|
1069 | - 'low_priority' => array(true, false) |
|
1070 | - ); |
|
1071 | - |
|
1072 | - assert(isset($flag_name, $known_names)); |
|
1073 | - assert( |
|
1074 | - in_array($flag_value, $flags[$flag_name], true) || |
|
1075 | - ($flag_name == 'max_predicted_time' && is_int($flag_value) && $flag_value >= 0) |
|
1076 | - ); |
|
1077 | - |
|
1078 | - switch ($flag_name) { |
|
1079 | - case 'reverse_scan': |
|
1080 | - $this->query_flags = setBit($this->query_flags, 0, $flag_value == 1); |
|
1081 | - break; |
|
1082 | - case 'sort_method': |
|
1083 | - $this->query_flags = setBit($this->query_flags, 1, $flag_value == 'kbuffer'); |
|
1084 | - break; |
|
1085 | - case 'max_predicted_time': |
|
1086 | - $this->query_flags = setBit($this->query_flags, 2, $flag_value > 0); |
|
1087 | - $this->predicted_time = (int)$flag_value; |
|
1088 | - break; |
|
1089 | - case 'boolean_simplify': |
|
1090 | - $this->query_flags = setBit($this->query_flags, 3, $flag_value); |
|
1091 | - break; |
|
1092 | - case 'idf': |
|
1093 | - if ($flag_value == 'normalized' || $flag_value == 'plain') { |
|
1094 | - $this->query_flags = setBit($this->query_flags, 4, $flag_value == 'plain'); |
|
1095 | - } |
|
1096 | - if ($flag_value == 'tfidf_normalized' || $flag_value == 'tfidf_unnormalized') { |
|
1097 | - $this->query_flags = setBit($this->query_flags, 6, $flag_value == 'tfidf_normalized'); |
|
1098 | - } |
|
1099 | - break; |
|
1100 | - case 'global_idf': |
|
1101 | - $this->query_flags = setBit($this->query_flags, 5, $flag_value); |
|
1102 | - break; |
|
1103 | - case 'low_priority': |
|
1104 | - $this->query_flags = setBit($this->query_flags, 8, $flag_value); |
|
1105 | - break; |
|
1106 | - } |
|
1107 | - } |
|
1108 | - |
|
1109 | - /** |
|
1110 | - * Set outer order by parameters |
|
1111 | - * |
|
1112 | - * @param string $order_by |
|
1113 | - * @param int $offset |
|
1114 | - * @param int $limit |
|
1115 | - */ |
|
1116 | - public function setOuterSelect($order_by, $offset, $limit) |
|
1117 | - { |
|
1118 | - assert(is_string($order_by)); |
|
1119 | - assert(is_int($offset)); |
|
1120 | - assert(is_int($limit)); |
|
1121 | - assert($offset >= 0); |
|
1122 | - assert($limit > 0); |
|
1123 | - |
|
1124 | - $this->outer_order_by = $order_by; |
|
1125 | - $this->outer_offset = $offset; |
|
1126 | - $this->outer_limit = $limit; |
|
1127 | - $this->has_outer = true; |
|
1128 | - } |
|
1129 | - |
|
1130 | - |
|
1131 | - ////////////////////////////////////////////////////////////////////////////// |
|
1132 | - |
|
1133 | - /** |
|
1134 | - * Clear all filters (for multi-queries) |
|
1135 | - */ |
|
1136 | - public function resetFilters() |
|
1137 | - { |
|
1138 | - $this->filters = array(); |
|
1139 | - $this->anchor = array(); |
|
1140 | - } |
|
1141 | - |
|
1142 | - /** |
|
1143 | - * Clear groupby settings (for multi-queries) |
|
1144 | - */ |
|
1145 | - public function resetGroupBy() |
|
1146 | - { |
|
1147 | - $this->group_by = ''; |
|
1148 | - $this->group_func = self::GROUP_BY_DAY; |
|
1149 | - $this->group_sort = '@group desc'; |
|
1150 | - $this->group_distinct = ''; |
|
1151 | - } |
|
1152 | - |
|
1153 | - /** |
|
1154 | - * Clear all attribute value overrides (for multi-queries) |
|
1155 | - */ |
|
1156 | - public function resetOverrides() |
|
1157 | - { |
|
1158 | - $this->overrides = array(); |
|
1159 | - } |
|
1160 | - |
|
1161 | - public function resetQueryFlag() |
|
1162 | - { |
|
1163 | - $this->query_flags = setBit(0, 6, true); // default idf=tfidf_normalized |
|
1164 | - $this->predicted_time = 0; |
|
1165 | - } |
|
1166 | - |
|
1167 | - public function resetOuterSelect() |
|
1168 | - { |
|
1169 | - $this->outer_order_by = ''; |
|
1170 | - $this->outer_offset = 0; |
|
1171 | - $this->outer_limit = 0; |
|
1172 | - $this->has_outer = false; |
|
1173 | - } |
|
1174 | - |
|
1175 | - ////////////////////////////////////////////////////////////////////////////// |
|
1176 | - |
|
1177 | - /** |
|
1178 | - * Connect to searchd server, run given search query through given indexes, and return the search results |
|
1179 | - * |
|
1180 | - * @param string $query |
|
1181 | - * @param string $index |
|
1182 | - * @param string $comment |
|
1183 | - * |
|
1184 | - * @return bool |
|
1185 | - */ |
|
1186 | - public function query($query, $index = '*', $comment = '') |
|
1187 | - { |
|
1188 | - assert(empty($this->reqs)); |
|
1189 | - |
|
1190 | - $this->addQuery($query, $index, $comment); |
|
1191 | - $results = $this->runQueries(); |
|
1192 | - $this->reqs = array(); // just in case it failed too early |
|
1193 | - |
|
1194 | - if (!is_array($results)) { |
|
1195 | - return false; // probably network error; error message should be already filled |
|
1196 | - } |
|
1197 | - |
|
1198 | - $this->error = $results[0]['error']; |
|
1199 | - $this->warning = $results[0]['warning']; |
|
1200 | - |
|
1201 | - if ($results[0]['status'] == self::SEARCHD_ERROR) { |
|
1202 | - return false; |
|
1203 | - } else { |
|
1204 | - return $results[0]; |
|
1205 | - } |
|
1206 | - } |
|
1207 | - |
|
1208 | - /** |
|
1209 | - * Helper to pack floats in network byte order |
|
1210 | - * |
|
1211 | - * @param float $float |
|
1212 | - * |
|
1213 | - * @return string |
|
1214 | - */ |
|
1215 | - protected function packFloat($float) |
|
1216 | - { |
|
1217 | - $t1 = pack('f', $float); // machine order |
|
1218 | - list(, $t2) = unpack('L*', $t1); // int in machine order |
|
1219 | - return pack('N', $t2); |
|
1220 | - } |
|
1221 | - |
|
1222 | - /** |
|
1223 | - * Add query to multi-query batch |
|
1224 | - * Returns index into results array from RunQueries() call |
|
1225 | - * |
|
1226 | - * @param string $query |
|
1227 | - * @param string $index |
|
1228 | - * @param string $comment |
|
1229 | - * |
|
1230 | - * @return int |
|
1231 | - */ |
|
1232 | - public function addQuery($query, $index = '*', $comment = '') |
|
1233 | - { |
|
1234 | - // mbstring workaround |
|
1235 | - $this->mbPush(); |
|
1236 | - |
|
1237 | - // build request |
|
1238 | - $req = pack('NNNNN', $this->query_flags, $this->offset, $this->limit, $this->mode, $this->ranker); |
|
1239 | - if ($this->ranker == self::RANK_EXPR) { |
|
1240 | - $req .= pack('N', strlen($this->rank_expr)) . $this->rank_expr; |
|
1241 | - } |
|
1242 | - $req .= pack('N', $this->sort); // (deprecated) sort mode |
|
1243 | - $req .= pack('N', strlen($this->sort_by)) . $this->sort_by; |
|
1244 | - $req .= pack('N', strlen($query)) . $query; // query itself |
|
1245 | - $req .= pack('N', count($this->weights)); // weights |
|
1246 | - foreach ($this->weights as $weight) { |
|
1247 | - $req .= pack('N', (int)$weight); |
|
1248 | - } |
|
1249 | - $req .= pack('N', strlen($index)) . $index; // indexes |
|
1250 | - $req .= pack('N', 1); // id64 range marker |
|
1251 | - $req .= pack64IntUnsigned($this->min_id) . pack64IntUnsigned($this->max_id); // id64 range |
|
1252 | - |
|
1253 | - // filters |
|
1254 | - $req .= pack('N', count($this->filters)); |
|
1255 | - foreach ($this->filters as $filter) { |
|
1256 | - $req .= pack('N', strlen($filter['attr'])) . $filter['attr']; |
|
1257 | - $req .= pack('N', $filter['type']); |
|
1258 | - switch ($filter['type']) { |
|
1259 | - case self::FILTER_VALUES: |
|
1260 | - $req .= pack('N', count($filter['values'])); |
|
1261 | - foreach ($filter['values'] as $value) { |
|
1262 | - $req .= pack64IntSigned($value); |
|
1263 | - } |
|
1264 | - break; |
|
1265 | - case self::FILTER_RANGE: |
|
1266 | - $req .= pack64IntSigned($filter['min']) . pack64IntSigned($filter['max']); |
|
1267 | - break; |
|
1268 | - case self::FILTER_FLOAT_RANGE: |
|
1269 | - $req .= $this->packFloat($filter['min']) . $this->packFloat($filter['max']); |
|
1270 | - break; |
|
1271 | - case self::FILTER_STRING: |
|
1272 | - $req .= pack('N', strlen($filter['value'])) . $filter['value']; |
|
1273 | - break; |
|
1274 | - default: |
|
1275 | - assert(0 && 'internal error: unhandled filter type'); |
|
1276 | - } |
|
1277 | - $req .= pack('N', $filter['exclude']); |
|
1278 | - } |
|
1279 | - |
|
1280 | - // group-by clause, max-matches count, group-sort clause, cutoff count |
|
1281 | - $req .= pack('NN', $this->group_func, strlen($this->group_by)) . $this->group_by; |
|
1282 | - $req .= pack('N', $this->max_matches); |
|
1283 | - $req .= pack('N', strlen($this->group_sort)) . $this->group_sort; |
|
1284 | - $req .= pack('NNN', $this->cutoff, $this->retry_count, $this->retry_delay); |
|
1285 | - $req .= pack('N', strlen($this->group_distinct)) . $this->group_distinct; |
|
1286 | - |
|
1287 | - // anchor point |
|
1288 | - if (empty($this->anchor)) { |
|
1289 | - $req .= pack('N', 0); |
|
1290 | - } else { |
|
1291 | - $a =& $this->anchor; |
|
1292 | - $req .= pack('N', 1); |
|
1293 | - $req .= pack('N', strlen($a['attrlat'])) . $a['attrlat']; |
|
1294 | - $req .= pack('N', strlen($a['attrlong'])) . $a['attrlong']; |
|
1295 | - $req .= $this->packFloat($a['lat']) . $this->packFloat($a['long']); |
|
1296 | - } |
|
1297 | - |
|
1298 | - // per-index weights |
|
1299 | - $req .= pack('N', count($this->index_weights)); |
|
1300 | - foreach ($this->index_weights as $idx => $weight) { |
|
1301 | - $req .= pack('N', strlen($idx)) . $idx . pack('N', $weight); |
|
1302 | - } |
|
1303 | - |
|
1304 | - // max query time |
|
1305 | - $req .= pack('N', $this->max_query_time); |
|
1306 | - |
|
1307 | - // per-field weights |
|
1308 | - $req .= pack('N', count($this->field_weights)); |
|
1309 | - foreach ($this->field_weights as $field => $weight) { |
|
1310 | - $req .= pack('N', strlen($field)) . $field . pack('N', $weight); |
|
1311 | - } |
|
1312 | - |
|
1313 | - // comment |
|
1314 | - $req .= pack('N', strlen($comment)) . $comment; |
|
1315 | - |
|
1316 | - // attribute overrides |
|
1317 | - $req .= pack('N', count($this->overrides)); |
|
1318 | - foreach ($this->overrides as $key => $entry) { |
|
1319 | - $req .= pack('N', strlen($entry['attr'])) . $entry['attr']; |
|
1320 | - $req .= pack('NN', $entry['type'], count($entry['values'])); |
|
1321 | - foreach ($entry['values'] as $id => $val) { |
|
1322 | - assert(is_numeric($id)); |
|
1323 | - assert(is_numeric($val)); |
|
1324 | - |
|
1325 | - $req .= pack64IntUnsigned($id); |
|
1326 | - switch ($entry['type']) { |
|
1327 | - case self::ATTR_FLOAT: |
|
1328 | - $req .= $this->packFloat($val); |
|
1329 | - break; |
|
1330 | - case self::ATTR_BIGINT: |
|
1331 | - $req .= pack64IntSigned($val); |
|
1332 | - break; |
|
1333 | - default: |
|
1334 | - $req .= pack('N', $val); |
|
1335 | - break; |
|
1336 | - } |
|
1337 | - } |
|
1338 | - } |
|
1339 | - |
|
1340 | - // select-list |
|
1341 | - $req .= pack('N', strlen($this->select)) . $this->select; |
|
1342 | - |
|
1343 | - // max_predicted_time |
|
1344 | - if ($this->predicted_time > 0) { |
|
1345 | - $req .= pack('N', (int)$this->predicted_time); |
|
1346 | - } |
|
1347 | - |
|
1348 | - $req .= pack('N', strlen($this->outer_order_by)) . $this->outer_order_by; |
|
1349 | - $req .= pack('NN', $this->outer_offset, $this->outer_limit); |
|
1350 | - if ($this->has_outer) { |
|
1351 | - $req .= pack('N', 1); |
|
1352 | - } else { |
|
1353 | - $req .= pack('N', 0); |
|
1354 | - } |
|
1355 | - |
|
1356 | - // mbstring workaround |
|
1357 | - $this->mbPop(); |
|
1358 | - |
|
1359 | - // store request to requests array |
|
1360 | - $this->reqs[] = $req; |
|
1361 | - return count($this->reqs) - 1; |
|
1362 | - } |
|
1363 | - |
|
1364 | - /** |
|
1365 | - * Connect to searchd, run queries batch, and return an array of result sets |
|
1366 | - * |
|
1367 | - * @return array|bool |
|
1368 | - */ |
|
1369 | - public function runQueries() |
|
1370 | - { |
|
1371 | - if (empty($this->reqs)) { |
|
1372 | - $this->error = 'no queries defined, issue AddQuery() first'; |
|
1373 | - return false; |
|
1374 | - } |
|
1375 | - |
|
1376 | - // mbstring workaround |
|
1377 | - $this->mbPush(); |
|
1378 | - |
|
1379 | - if (($fp = $this->connect()) === false) { |
|
1380 | - $this->mbPop(); |
|
1381 | - return false; |
|
1382 | - } |
|
1383 | - |
|
1384 | - // send query, get response |
|
1385 | - $nreqs = count($this->reqs); |
|
1386 | - $req = join('', $this->reqs); |
|
1387 | - $len = 8 + strlen($req); |
|
1388 | - // add header |
|
1389 | - $req = pack('nnNNN', self::SEARCHD_COMMAND_SEARCH, self::VER_COMMAND_SEARCH, $len, 0, $nreqs) . $req; |
|
1390 | - |
|
1391 | - if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_SEARCH))) { |
|
1392 | - $this->mbPop(); |
|
1393 | - return false; |
|
1394 | - } |
|
1395 | - |
|
1396 | - // query sent ok; we can reset reqs now |
|
1397 | - $this->reqs = array(); |
|
1398 | - |
|
1399 | - // parse and return response |
|
1400 | - return $this->parseSearchResponse($response, $nreqs); |
|
1401 | - } |
|
1402 | - |
|
1403 | - /** |
|
1404 | - * Parse and return search query (or queries) response |
|
1405 | - * |
|
1406 | - * @param string $response |
|
1407 | - * @param int $nreqs |
|
1408 | - * |
|
1409 | - * @return array |
|
1410 | - */ |
|
1411 | - protected function parseSearchResponse($response, $nreqs) |
|
1412 | - { |
|
1413 | - $p = 0; // current position |
|
1414 | - $max = strlen($response); // max position for checks, to protect against broken responses |
|
1415 | - |
|
1416 | - $results = array(); |
|
1417 | - for ($ires = 0; $ires < $nreqs && $p < $max; $ires++) { |
|
1418 | - $results[] = array(); |
|
1419 | - $result =& $results[$ires]; |
|
1420 | - |
|
1421 | - $result['error'] = ''; |
|
1422 | - $result['warning'] = ''; |
|
1423 | - |
|
1424 | - // extract status |
|
1425 | - list(, $status) = unpack('N*', substr($response, $p, 4)); |
|
1426 | - $p += 4; |
|
1427 | - $result['status'] = $status; |
|
1428 | - if ($status != self::SEARCHD_OK) { |
|
1429 | - list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1430 | - $p += 4; |
|
1431 | - $message = substr($response, $p, $len); |
|
1432 | - $p += $len; |
|
1433 | - |
|
1434 | - if ($status == self::SEARCHD_WARNING) { |
|
1435 | - $result['warning'] = $message; |
|
1436 | - } else { |
|
1437 | - $result['error'] = $message; |
|
1438 | - continue; |
|
1439 | - } |
|
1440 | - } |
|
1441 | - |
|
1442 | - // read schema |
|
1443 | - $fields = array(); |
|
1444 | - $attrs = array(); |
|
1445 | - |
|
1446 | - list(, $nfields) = unpack('N*', substr($response, $p, 4)); |
|
1447 | - $p += 4; |
|
1448 | - while ($nfields --> 0 && $p < $max) { |
|
1449 | - list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1450 | - $p += 4; |
|
1451 | - $fields[] = substr($response, $p, $len); |
|
1452 | - $p += $len; |
|
1453 | - } |
|
1454 | - $result['fields'] = $fields; |
|
1455 | - |
|
1456 | - list(, $n_attrs) = unpack('N*', substr($response, $p, 4)); |
|
1457 | - $p += 4; |
|
1458 | - while ($n_attrs --> 0 && $p < $max) { |
|
1459 | - list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1460 | - $p += 4; |
|
1461 | - $attr = substr($response, $p, $len); |
|
1462 | - $p += $len; |
|
1463 | - list(, $type) = unpack('N*', substr($response, $p, 4)); |
|
1464 | - $p += 4; |
|
1465 | - $attrs[$attr] = $type; |
|
1466 | - } |
|
1467 | - $result['attrs'] = $attrs; |
|
1468 | - |
|
1469 | - // read match count |
|
1470 | - list(, $count) = unpack('N*', substr($response, $p, 4)); |
|
1471 | - $p += 4; |
|
1472 | - list(, $id64) = unpack('N*', substr($response, $p, 4)); |
|
1473 | - $p += 4; |
|
1474 | - |
|
1475 | - // read matches |
|
1476 | - $idx = -1; |
|
1477 | - while ($count --> 0 && $p < $max) { |
|
1478 | - // index into result array |
|
1479 | - $idx++; |
|
1480 | - |
|
1481 | - // parse document id and weight |
|
1482 | - if ($id64) { |
|
1483 | - $doc = unpack64IntUnsigned(substr($response, $p, 8)); |
|
1484 | - $p += 8; |
|
1485 | - list(,$weight) = unpack('N*', substr($response, $p, 4)); |
|
1486 | - $p += 4; |
|
1487 | - } else { |
|
1488 | - list($doc, $weight) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1489 | - $p += 8; |
|
1490 | - $doc = fixUInt($doc); |
|
1491 | - } |
|
1492 | - $weight = sprintf('%u', $weight); |
|
1493 | - |
|
1494 | - // create match entry |
|
1495 | - if ($this->array_result) { |
|
1496 | - $result['matches'][$idx] = array('id' => $doc, 'weight' => $weight); |
|
1497 | - } else { |
|
1498 | - $result['matches'][$doc]['weight'] = $weight; |
|
1499 | - } |
|
1500 | - |
|
1501 | - // parse and create attributes |
|
1502 | - $attr_values = array(); |
|
1503 | - foreach ($attrs as $attr => $type) { |
|
1504 | - // handle 64bit int |
|
1505 | - if ($type == self::ATTR_BIGINT) { |
|
1506 | - $attr_values[$attr] = unpack64IntSigned(substr($response, $p, 8)); |
|
1507 | - $p += 8; |
|
1508 | - continue; |
|
1509 | - } |
|
1510 | - |
|
1511 | - // handle floats |
|
1512 | - if ($type == self::ATTR_FLOAT) { |
|
1513 | - list(, $u_value) = unpack('N*', substr($response, $p, 4)); |
|
1514 | - $p += 4; |
|
1515 | - list(, $f_value) = unpack('f*', pack('L', $u_value)); |
|
1516 | - $attr_values[$attr] = $f_value; |
|
1517 | - continue; |
|
1518 | - } |
|
1519 | - |
|
1520 | - // handle everything else as unsigned int |
|
1521 | - list(, $val) = unpack('N*', substr($response, $p, 4)); |
|
1522 | - $p += 4; |
|
1523 | - if ($type == self::ATTR_MULTI) { |
|
1524 | - $attr_values[$attr] = array(); |
|
1525 | - $n_values = $val; |
|
1526 | - while ($n_values --> 0 && $p < $max) { |
|
1527 | - list(, $val) = unpack('N*', substr($response, $p, 4)); |
|
1528 | - $p += 4; |
|
1529 | - $attr_values[$attr][] = fixUInt($val); |
|
1530 | - } |
|
1531 | - } elseif ($type == self::ATTR_MULTI64) { |
|
1532 | - $attr_values[$attr] = array(); |
|
1533 | - $n_values = $val; |
|
1534 | - while ($n_values > 0 && $p < $max) { |
|
1535 | - $attr_values[$attr][] = unpack64IntSigned(substr($response, $p, 8)); |
|
1536 | - $p += 8; |
|
1537 | - $n_values -= 2; |
|
1538 | - } |
|
1539 | - } elseif ($type == self::ATTR_STRING) { |
|
1540 | - $attr_values[$attr] = substr($response, $p, $val); |
|
1541 | - $p += $val; |
|
1542 | - } elseif ($type == self::ATTR_FACTORS) { |
|
1543 | - $attr_values[$attr] = substr($response, $p, $val - 4); |
|
1544 | - $p += $val-4; |
|
1545 | - } else { |
|
1546 | - $attr_values[$attr] = fixUInt($val); |
|
1547 | - } |
|
1548 | - } |
|
1549 | - |
|
1550 | - if ($this->array_result) { |
|
1551 | - $result['matches'][$idx]['attrs'] = $attr_values; |
|
1552 | - } else { |
|
1553 | - $result['matches'][$doc]['attrs'] = $attr_values; |
|
1554 | - } |
|
1555 | - } |
|
1556 | - |
|
1557 | - list($total, $total_found, $msecs, $words) = array_values(unpack('N*N*N*N*', substr($response, $p, 16))); |
|
1558 | - $result['total'] = sprintf('%u', $total); |
|
1559 | - $result['total_found'] = sprintf('%u', $total_found); |
|
1560 | - $result['time'] = sprintf('%.3f', $msecs / 1000); |
|
1561 | - $p += 16; |
|
1562 | - |
|
1563 | - while ($words --> 0 && $p < $max) { |
|
1564 | - list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1565 | - $p += 4; |
|
1566 | - $word = substr($response, $p, $len); |
|
1567 | - $p += $len; |
|
1568 | - list($docs, $hits) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1569 | - $p += 8; |
|
1570 | - $result['words'][$word] = array ( |
|
1571 | - 'docs' => sprintf('%u', $docs), |
|
1572 | - 'hits' => sprintf('%u', $hits) |
|
1573 | - ); |
|
1574 | - } |
|
1575 | - } |
|
1576 | - |
|
1577 | - $this->mbPop(); |
|
1578 | - return $results; |
|
1579 | - } |
|
1580 | - |
|
1581 | - ///////////////////////////////////////////////////////////////////////////// |
|
1582 | - // excerpts generation |
|
1583 | - ///////////////////////////////////////////////////////////////////////////// |
|
1584 | - |
|
1585 | - /** |
|
1586 | - * Connect to searchd server, and generate exceprts (snippets) of given documents for given query. |
|
1587 | - * Returns false on failure, an array of snippets on success |
|
1588 | - * |
|
1589 | - * @param array $docs |
|
1590 | - * @param string $index |
|
1591 | - * @param string $words |
|
1592 | - * @param array $opts |
|
1593 | - * |
|
1594 | - * @return array|bool |
|
1595 | - */ |
|
1596 | - public function buildExcerpts(array $docs, $index, $words, array $opts = array()) |
|
1597 | - { |
|
1598 | - assert(is_string($index)); |
|
1599 | - assert(is_string($words)); |
|
1600 | - |
|
1601 | - $this->mbPush(); |
|
1602 | - |
|
1603 | - if (($fp = $this->connect()) === false) { |
|
1604 | - $this->mbPop(); |
|
1605 | - return false; |
|
1606 | - } |
|
1607 | - |
|
1608 | - ///////////////// |
|
1609 | - // fixup options |
|
1610 | - ///////////////// |
|
1611 | - |
|
1612 | - $opts = array_merge(array( |
|
1613 | - 'before_match' => '<b>', |
|
1614 | - 'after_match' => '</b>', |
|
1615 | - 'chunk_separator' => ' ... ', |
|
1616 | - 'limit' => 256, |
|
1617 | - 'limit_passages' => 0, |
|
1618 | - 'limit_words' => 0, |
|
1619 | - 'around' => 5, |
|
1620 | - 'exact_phrase' => false, |
|
1621 | - 'single_passage' => false, |
|
1622 | - 'use_boundaries' => false, |
|
1623 | - 'weight_order' => false, |
|
1624 | - 'query_mode' => false, |
|
1625 | - 'force_all_words' => false, |
|
1626 | - 'start_passage_id' => 1, |
|
1627 | - 'load_files' => false, |
|
1628 | - 'html_strip_mode' => 'index', |
|
1629 | - 'allow_empty' => false, |
|
1630 | - 'passage_boundary' => 'none', |
|
1631 | - 'emit_zones' => false, |
|
1632 | - 'load_files_scattered' => false |
|
1633 | - ), $opts); |
|
1634 | - |
|
1635 | - ///////////////// |
|
1636 | - // build request |
|
1637 | - ///////////////// |
|
1638 | - |
|
1639 | - // v.1.2 req |
|
1640 | - $flags = 1; // remove spaces |
|
1641 | - if ($opts['exact_phrase']) { |
|
1642 | - $flags |= 2; |
|
1643 | - } |
|
1644 | - if ($opts['single_passage']) { |
|
1645 | - $flags |= 4; |
|
1646 | - } |
|
1647 | - if ($opts['use_boundaries']) { |
|
1648 | - $flags |= 8; |
|
1649 | - } |
|
1650 | - if ($opts['weight_order']) { |
|
1651 | - $flags |= 16; |
|
1652 | - } |
|
1653 | - if ($opts['query_mode']) { |
|
1654 | - $flags |= 32; |
|
1655 | - } |
|
1656 | - if ($opts['force_all_words']) { |
|
1657 | - $flags |= 64; |
|
1658 | - } |
|
1659 | - if ($opts['load_files']) { |
|
1660 | - $flags |= 128; |
|
1661 | - } |
|
1662 | - if ($opts['allow_empty']) { |
|
1663 | - $flags |= 256; |
|
1664 | - } |
|
1665 | - if ($opts['emit_zones']) { |
|
1666 | - $flags |= 512; |
|
1667 | - } |
|
1668 | - if ($opts['load_files_scattered']) { |
|
1669 | - $flags |= 1024; |
|
1670 | - } |
|
1671 | - $req = pack('NN', 0, $flags); // mode=0, flags=$flags |
|
1672 | - $req .= pack('N', strlen($index)) . $index; // req index |
|
1673 | - $req .= pack('N', strlen($words)) . $words; // req words |
|
1674 | - |
|
1675 | - // options |
|
1676 | - $req .= pack('N', strlen($opts['before_match'])) . $opts['before_match']; |
|
1677 | - $req .= pack('N', strlen($opts['after_match'])) . $opts['after_match']; |
|
1678 | - $req .= pack('N', strlen($opts['chunk_separator'])) . $opts['chunk_separator']; |
|
1679 | - $req .= pack('NN', (int)$opts['limit'], (int)$opts['around']); |
|
1680 | - // v.1.2 |
|
1681 | - $req .= pack('NNN', (int)$opts['limit_passages'], (int)$opts['limit_words'], (int)$opts['start_passage_id']); |
|
1682 | - $req .= pack('N', strlen($opts['html_strip_mode'])) . $opts['html_strip_mode']; |
|
1683 | - $req .= pack('N', strlen($opts['passage_boundary'])) . $opts['passage_boundary']; |
|
1684 | - |
|
1685 | - // documents |
|
1686 | - $req .= pack('N', count($docs)); |
|
1687 | - foreach ($docs as $doc) { |
|
1688 | - assert(is_string($doc)); |
|
1689 | - $req .= pack('N', strlen($doc)) . $doc; |
|
1690 | - } |
|
1691 | - |
|
1692 | - //////////////////////////// |
|
1693 | - // send query, get response |
|
1694 | - //////////////////////////// |
|
1695 | - |
|
1696 | - $len = strlen($req); |
|
1697 | - $req = pack('nnN', self::SEARCHD_COMMAND_EXCERPT, self::VER_COMMAND_EXCERPT, $len) . $req; // add header |
|
1698 | - if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_EXCERPT))) { |
|
1699 | - $this->mbPop(); |
|
1700 | - return false; |
|
1701 | - } |
|
1702 | - |
|
1703 | - ////////////////// |
|
1704 | - // parse response |
|
1705 | - ////////////////// |
|
1706 | - |
|
1707 | - $pos = 0; |
|
1708 | - $res = array(); |
|
1709 | - $rlen = strlen($response); |
|
1710 | - $count = count($docs); |
|
1711 | - while ($count--) { |
|
1712 | - list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1713 | - $pos += 4; |
|
1714 | - |
|
1715 | - if ($pos + $len > $rlen) { |
|
1716 | - $this->error = 'incomplete reply'; |
|
1717 | - $this->mbPop(); |
|
1718 | - return false; |
|
1719 | - } |
|
1720 | - $res[] = $len ? substr($response, $pos, $len) : ''; |
|
1721 | - $pos += $len; |
|
1722 | - } |
|
1723 | - |
|
1724 | - $this->mbPop(); |
|
1725 | - return $res; |
|
1726 | - } |
|
1727 | - |
|
1728 | - |
|
1729 | - ///////////////////////////////////////////////////////////////////////////// |
|
1730 | - // keyword generation |
|
1731 | - ///////////////////////////////////////////////////////////////////////////// |
|
1732 | - |
|
1733 | - /** |
|
1734 | - * Connect to searchd server, and generate keyword list for a given query returns false on failure, |
|
1735 | - * an array of words on success |
|
1736 | - * |
|
1737 | - * @param string $query |
|
1738 | - * @param string $index |
|
1739 | - * @param bool $hits |
|
1740 | - * |
|
1741 | - * @return array|bool |
|
1742 | - */ |
|
1743 | - public function buildKeywords($query, $index, $hits) |
|
1744 | - { |
|
1745 | - assert(is_string($query)); |
|
1746 | - assert(is_string($index)); |
|
1747 | - assert(is_bool($hits)); |
|
1748 | - |
|
1749 | - $this->mbPush(); |
|
1750 | - |
|
1751 | - if (($fp = $this->connect()) === false) { |
|
1752 | - $this->mbPop(); |
|
1753 | - return false; |
|
1754 | - } |
|
1755 | - |
|
1756 | - ///////////////// |
|
1757 | - // build request |
|
1758 | - ///////////////// |
|
1759 | - |
|
1760 | - // v.1.0 req |
|
1761 | - $req = pack('N', strlen($query)) . $query; // req query |
|
1762 | - $req .= pack('N', strlen($index)) . $index; // req index |
|
1763 | - $req .= pack('N', (int)$hits); |
|
1764 | - |
|
1765 | - //////////////////////////// |
|
1766 | - // send query, get response |
|
1767 | - //////////////////////////// |
|
1768 | - |
|
1769 | - $len = strlen($req); |
|
1770 | - $req = pack('nnN', self::SEARCHD_COMMAND_KEYWORDS, self::VER_COMMAND_KEYWORDS, $len) . $req; // add header |
|
1771 | - if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_KEYWORDS))) { |
|
1772 | - $this->mbPop(); |
|
1773 | - return false; |
|
1774 | - } |
|
1775 | - |
|
1776 | - ////////////////// |
|
1777 | - // parse response |
|
1778 | - ////////////////// |
|
1779 | - |
|
1780 | - $pos = 0; |
|
1781 | - $res = array(); |
|
1782 | - $rlen = strlen($response); |
|
1783 | - list(, $nwords) = unpack('N*', substr($response, $pos, 4)); |
|
1784 | - $pos += 4; |
|
1785 | - for ($i = 0; $i < $nwords; $i++) { |
|
1786 | - list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1787 | - $pos += 4; |
|
1788 | - $tokenized = $len ? substr($response, $pos, $len) : ''; |
|
1789 | - $pos += $len; |
|
1790 | - |
|
1791 | - list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1792 | - $pos += 4; |
|
1793 | - $normalized = $len ? substr($response, $pos, $len) : ''; |
|
1794 | - $pos += $len; |
|
1795 | - |
|
1796 | - $res[] = array( |
|
1797 | - 'tokenized' => $tokenized, |
|
1798 | - 'normalized' => $normalized |
|
1799 | - ); |
|
1800 | - |
|
1801 | - if ($hits) { |
|
1802 | - list($ndocs, $nhits) = array_values(unpack('N*N*', substr($response, $pos, 8))); |
|
1803 | - $pos += 8; |
|
1804 | - $res[$i]['docs'] = $ndocs; |
|
1805 | - $res[$i]['hits'] = $nhits; |
|
1806 | - } |
|
1807 | - |
|
1808 | - if ($pos > $rlen) { |
|
1809 | - $this->error = 'incomplete reply'; |
|
1810 | - $this->mbPop(); |
|
1811 | - return false; |
|
1812 | - } |
|
1813 | - } |
|
1814 | - |
|
1815 | - $this->mbPop(); |
|
1816 | - return $res; |
|
1817 | - } |
|
1818 | - |
|
1819 | - /** |
|
1820 | - * @param string $string |
|
1821 | - * |
|
1822 | - * @return string |
|
1823 | - */ |
|
1824 | - public function escapeString($string) |
|
1825 | - { |
|
1826 | - $from = array('\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=', '<'); |
|
1827 | - $to = array('\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=', '\<'); |
|
1828 | - |
|
1829 | - return str_replace($from, $to, $string); |
|
1830 | - } |
|
1831 | - |
|
1832 | - ///////////////////////////////////////////////////////////////////////////// |
|
1833 | - // attribute updates |
|
1834 | - ///////////////////////////////////////////////////////////////////////////// |
|
1835 | - |
|
1836 | - /** |
|
1837 | - * Batch update given attributes in given rows in given indexes |
|
1838 | - * Returns amount of updated documents (0 or more) on success, or -1 on failure |
|
1839 | - * |
|
1840 | - * @param string $index |
|
1841 | - * @param array $attrs |
|
1842 | - * @param array $values |
|
1843 | - * @param bool $mva |
|
1844 | - * @param bool $ignore_non_existent |
|
1845 | - * |
|
1846 | - * @return int |
|
1847 | - */ |
|
1848 | - public function updateAttributes($index, array $attrs, array $values, $mva = false, $ignore_non_existent = false) |
|
1849 | - { |
|
1850 | - // verify everything |
|
1851 | - assert(is_string($index)); |
|
1852 | - assert(is_bool($mva)); |
|
1853 | - assert(is_bool($ignore_non_existent)); |
|
1854 | - |
|
1855 | - foreach ($attrs as $attr) { |
|
1856 | - assert(is_string($attr)); |
|
1857 | - } |
|
1858 | - |
|
1859 | - foreach ($values as $id => $entry) { |
|
1860 | - assert(is_numeric($id)); |
|
1861 | - assert(is_array($entry)); |
|
1862 | - assert(count($entry) == count($attrs)); |
|
1863 | - foreach ($entry as $v) { |
|
1864 | - if ($mva) { |
|
1865 | - assert(is_array($v)); |
|
1866 | - foreach ($v as $vv) { |
|
1867 | - assert(is_int($vv)); |
|
1868 | - } |
|
1869 | - } else { |
|
1870 | - assert(is_int($v)); |
|
1871 | - } |
|
1872 | - } |
|
1873 | - } |
|
1874 | - |
|
1875 | - // build request |
|
1876 | - $this->mbPush(); |
|
1877 | - $req = pack('N', strlen($index)) . $index; |
|
1878 | - |
|
1879 | - $req .= pack('N', count($attrs)); |
|
1880 | - $req .= pack('N', $ignore_non_existent ? 1 : 0); |
|
1881 | - foreach ($attrs as $attr) { |
|
1882 | - $req .= pack('N', strlen($attr)) . $attr; |
|
1883 | - $req .= pack('N', $mva ? 1 : 0); |
|
1884 | - } |
|
1885 | - |
|
1886 | - $req .= pack('N', count($values)); |
|
1887 | - foreach ($values as $id => $entry) { |
|
1888 | - $req .= pack64IntUnsigned($id); |
|
1889 | - foreach ($entry as $v) { |
|
1890 | - $req .= pack('N', $mva ? count($v) : $v); |
|
1891 | - if ($mva) { |
|
1892 | - foreach ($v as $vv) { |
|
1893 | - $req .= pack('N', $vv); |
|
1894 | - } |
|
1895 | - } |
|
1896 | - } |
|
1897 | - } |
|
1898 | - |
|
1899 | - // connect, send query, get response |
|
1900 | - if (($fp = $this->connect()) === false) { |
|
1901 | - $this->mbPop(); |
|
1902 | - return -1; |
|
1903 | - } |
|
1904 | - |
|
1905 | - $len = strlen($req); |
|
1906 | - $req = pack('nnN', self::SEARCHD_COMMAND_UPDATE, self::VER_COMMAND_UPDATE, $len) . $req; // add header |
|
1907 | - if (!$this->send($fp, $req, $len + 8)) { |
|
1908 | - $this->mbPop(); |
|
1909 | - return -1; |
|
1910 | - } |
|
1911 | - |
|
1912 | - if (!($response = $this->getResponse($fp, self::VER_COMMAND_UPDATE))) { |
|
1913 | - $this->mbPop(); |
|
1914 | - return -1; |
|
1915 | - } |
|
1916 | - |
|
1917 | - // parse response |
|
1918 | - list(, $updated) = unpack('N*', substr($response, 0, 4)); |
|
1919 | - $this->mbPop(); |
|
1920 | - return $updated; |
|
1921 | - } |
|
1922 | - |
|
1923 | - ///////////////////////////////////////////////////////////////////////////// |
|
1924 | - // persistent connections |
|
1925 | - ///////////////////////////////////////////////////////////////////////////// |
|
1926 | - |
|
1927 | - /** |
|
1928 | - * @return bool |
|
1929 | - */ |
|
1930 | - public function open() |
|
1931 | - { |
|
1932 | - if ($this->socket !== false) { |
|
1933 | - $this->error = 'already connected'; |
|
1934 | - return false; |
|
1935 | - } |
|
1936 | - if (($fp = $this->connect()) === false) |
|
1937 | - return false; |
|
1938 | - |
|
1939 | - // command, command version = 0, body length = 4, body = 1 |
|
1940 | - $req = pack('nnNN', self::SEARCHD_COMMAND_PERSIST, 0, 4, 1); |
|
1941 | - if (!$this->send($fp, $req, 12)) { |
|
1942 | - return false; |
|
1943 | - } |
|
1944 | - |
|
1945 | - $this->socket = $fp; |
|
1946 | - return true; |
|
1947 | - } |
|
1948 | - |
|
1949 | - /** |
|
1950 | - * @return bool |
|
1951 | - */ |
|
1952 | - public function close() |
|
1953 | - { |
|
1954 | - if ($this->socket === false) { |
|
1955 | - $this->error = 'not connected'; |
|
1956 | - return false; |
|
1957 | - } |
|
1958 | - |
|
1959 | - fclose($this->socket); |
|
1960 | - $this->socket = false; |
|
1961 | - |
|
1962 | - return true; |
|
1963 | - } |
|
1964 | - |
|
1965 | - ////////////////////////////////////////////////////////////////////////// |
|
1966 | - // status |
|
1967 | - ////////////////////////////////////////////////////////////////////////// |
|
1968 | - |
|
1969 | - /** |
|
1970 | - * @param bool $session |
|
1971 | - * |
|
1972 | - * @return array|bool |
|
1973 | - */ |
|
1974 | - public function status($session = false) |
|
1975 | - { |
|
1976 | - assert(is_bool($session)); |
|
1977 | - |
|
1978 | - $this->mbPush(); |
|
1979 | - if (($fp = $this->connect()) === false) { |
|
1980 | - $this->mbPop(); |
|
1981 | - return false; |
|
1982 | - } |
|
1983 | - |
|
1984 | - // len=4, body=1 |
|
1985 | - $req = pack('nnNN', self::SEARCHD_COMMAND_STATUS, self::VER_COMMAND_STATUS, 4, $session ? 0 : 1); |
|
1986 | - if (!$this->send($fp, $req, 12) || !($response = $this->getResponse($fp, self::VER_COMMAND_STATUS))) { |
|
1987 | - $this->mbPop(); |
|
1988 | - return false; |
|
1989 | - } |
|
1990 | - |
|
1991 | - $res = substr($response, 4); // just ignore length, error handling, etc |
|
1992 | - $p = 0; |
|
1993 | - list($rows, $cols) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1994 | - $p += 8; |
|
1995 | - |
|
1996 | - $res = array(); |
|
1997 | - for ($i = 0; $i < $rows; $i++) { |
|
1998 | - for ($j = 0; $j < $cols; $j++) { |
|
1999 | - list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
2000 | - $p += 4; |
|
2001 | - $res[$i][] = substr($response, $p, $len); |
|
2002 | - $p += $len; |
|
2003 | - } |
|
2004 | - } |
|
2005 | - |
|
2006 | - $this->mbPop(); |
|
2007 | - return $res; |
|
2008 | - } |
|
2009 | - |
|
2010 | - ////////////////////////////////////////////////////////////////////////// |
|
2011 | - // flush |
|
2012 | - ////////////////////////////////////////////////////////////////////////// |
|
2013 | - |
|
2014 | - /** |
|
2015 | - * @return int |
|
2016 | - */ |
|
2017 | - public function flushAttributes() |
|
2018 | - { |
|
2019 | - $this->mbPush(); |
|
2020 | - if (($fp = $this->connect()) === false) { |
|
2021 | - $this->mbPop(); |
|
2022 | - return -1; |
|
2023 | - } |
|
2024 | - |
|
2025 | - $req = pack('nnN', self::SEARCHD_COMMAND_FLUSH_ATTRS, self::VER_COMMAND_FLUSH_ATTRS, 0); // len=0 |
|
2026 | - if (!$this->send($fp, $req, 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_FLUSH_ATTRS))) { |
|
2027 | - $this->mbPop(); |
|
2028 | - return -1; |
|
2029 | - } |
|
2030 | - |
|
2031 | - $tag = -1; |
|
2032 | - if (strlen($response) == 4) { |
|
2033 | - list(, $tag) = unpack('N*', $response); |
|
2034 | - } else { |
|
2035 | - $this->error = 'unexpected response length'; |
|
2036 | - } |
|
2037 | - |
|
2038 | - $this->mbPop(); |
|
2039 | - return $tag; |
|
2040 | - } |
|
40 | + /** |
|
41 | + * Searchd host |
|
42 | + * |
|
43 | + * @var string |
|
44 | + */ |
|
45 | + protected $host = 'localhost'; |
|
46 | + |
|
47 | + /** |
|
48 | + * Searchd port |
|
49 | + * |
|
50 | + * @var int |
|
51 | + */ |
|
52 | + protected $port = 9312; |
|
53 | + |
|
54 | + /** |
|
55 | + * How many records to seek from result-set start |
|
56 | + * |
|
57 | + * @var int |
|
58 | + */ |
|
59 | + protected $offset = 0; |
|
60 | + |
|
61 | + /** |
|
62 | + * How many records to return from result-set starting at offset |
|
63 | + * |
|
64 | + * @var int |
|
65 | + */ |
|
66 | + protected $limit = 20; |
|
67 | + |
|
68 | + /** |
|
69 | + * Query matching mode |
|
70 | + * |
|
71 | + * @var int |
|
72 | + */ |
|
73 | + protected $mode = self::MATCH_EXTENDED2; |
|
74 | + |
|
75 | + /** |
|
76 | + * Per-field weights (default is 1 for all fields) |
|
77 | + * |
|
78 | + * @var array |
|
79 | + */ |
|
80 | + protected $weights = array(); |
|
81 | + |
|
82 | + /** |
|
83 | + * Match sorting mode |
|
84 | + * |
|
85 | + * @var int |
|
86 | + */ |
|
87 | + protected $sort = self::SORT_RELEVANCE; |
|
88 | + |
|
89 | + /** |
|
90 | + * Attribute to sort by |
|
91 | + * |
|
92 | + * @var string |
|
93 | + */ |
|
94 | + protected $sort_by = ''; |
|
95 | + |
|
96 | + /** |
|
97 | + * Min ID to match (0 means no limit) |
|
98 | + * |
|
99 | + * @var int |
|
100 | + */ |
|
101 | + protected $min_id = 0; |
|
102 | + |
|
103 | + /** |
|
104 | + * Max ID to match (0 means no limit) |
|
105 | + * |
|
106 | + * @var int |
|
107 | + */ |
|
108 | + protected $max_id = 0; |
|
109 | + |
|
110 | + /** |
|
111 | + * Search filters |
|
112 | + * |
|
113 | + * @var array |
|
114 | + */ |
|
115 | + protected $filters = array(); |
|
116 | + |
|
117 | + /** |
|
118 | + * Group-by attribute name |
|
119 | + * |
|
120 | + * @var string |
|
121 | + */ |
|
122 | + protected $group_by = ''; |
|
123 | + |
|
124 | + /** |
|
125 | + * Group-by function (to pre-process group-by attribute value with) |
|
126 | + * |
|
127 | + * @var int |
|
128 | + */ |
|
129 | + protected $group_func = self::GROUP_BY_DAY; |
|
130 | + |
|
131 | + /** |
|
132 | + * Group-by sorting clause (to sort groups in result set with) |
|
133 | + * |
|
134 | + * @var string |
|
135 | + */ |
|
136 | + protected $group_sort = '@group desc'; |
|
137 | + |
|
138 | + /** |
|
139 | + * Group-by count-distinct attribute |
|
140 | + * |
|
141 | + * @var string |
|
142 | + */ |
|
143 | + protected $group_distinct = ''; |
|
144 | + |
|
145 | + /** |
|
146 | + * Max matches to retrieve |
|
147 | + * |
|
148 | + * @var int |
|
149 | + */ |
|
150 | + protected $max_matches = 1000; |
|
151 | + |
|
152 | + /** |
|
153 | + * Cutoff to stop searching at |
|
154 | + * |
|
155 | + * @var int |
|
156 | + */ |
|
157 | + protected $cutoff = 0; |
|
158 | + |
|
159 | + /** |
|
160 | + * Distributed retries count |
|
161 | + * |
|
162 | + * @var int |
|
163 | + */ |
|
164 | + protected $retry_count = 0; |
|
165 | + |
|
166 | + /** |
|
167 | + * Distributed retries delay |
|
168 | + * |
|
169 | + * @var int |
|
170 | + */ |
|
171 | + protected $retry_delay = 0; |
|
172 | + |
|
173 | + /** |
|
174 | + * Geographical anchor point |
|
175 | + * |
|
176 | + * @var array |
|
177 | + */ |
|
178 | + protected $anchor = array(); |
|
179 | + |
|
180 | + /** |
|
181 | + * Per-index weights |
|
182 | + * |
|
183 | + * @var array |
|
184 | + */ |
|
185 | + protected $index_weights = array(); |
|
186 | + |
|
187 | + /** |
|
188 | + * Ranking mode |
|
189 | + * |
|
190 | + * @var int |
|
191 | + */ |
|
192 | + protected $ranker = self::RANK_PROXIMITY_BM25; |
|
193 | + |
|
194 | + /** |
|
195 | + * Ranking mode expression (for self::RANK_EXPR) |
|
196 | + * |
|
197 | + * @var string |
|
198 | + */ |
|
199 | + protected $rank_expr = ''; |
|
200 | + |
|
201 | + /** |
|
202 | + * Max query time, milliseconds (0 means no limit) |
|
203 | + * |
|
204 | + * @var int |
|
205 | + */ |
|
206 | + protected $max_query_time = 0; |
|
207 | + |
|
208 | + /** |
|
209 | + * Per-field-name weights |
|
210 | + * |
|
211 | + * @var array |
|
212 | + */ |
|
213 | + protected $field_weights = array(); |
|
214 | + |
|
215 | + /** |
|
216 | + * Per-query attribute values overrides |
|
217 | + * |
|
218 | + * @var array |
|
219 | + */ |
|
220 | + protected $overrides = array(); |
|
221 | + |
|
222 | + /** |
|
223 | + * Select-list (attributes or expressions, with optional aliases) |
|
224 | + * |
|
225 | + * @var string |
|
226 | + */ |
|
227 | + protected $select = '*'; |
|
228 | + |
|
229 | + /** |
|
230 | + * Per-query various flags |
|
231 | + * |
|
232 | + * @var int |
|
233 | + */ |
|
234 | + protected $query_flags = 0; |
|
235 | + |
|
236 | + /** |
|
237 | + * Per-query max_predicted_time |
|
238 | + * |
|
239 | + * @var int |
|
240 | + */ |
|
241 | + protected $predicted_time = 0; |
|
242 | + |
|
243 | + /** |
|
244 | + * Outer match sort by |
|
245 | + * |
|
246 | + * @var string |
|
247 | + */ |
|
248 | + protected $outer_order_by = ''; |
|
249 | + |
|
250 | + /** |
|
251 | + * Outer offset |
|
252 | + * |
|
253 | + * @var int |
|
254 | + */ |
|
255 | + protected $outer_offset = 0; |
|
256 | + |
|
257 | + /** |
|
258 | + * Outer limit |
|
259 | + * |
|
260 | + * @var int |
|
261 | + */ |
|
262 | + protected $outer_limit = 0; |
|
263 | + |
|
264 | + /** |
|
265 | + * @var bool |
|
266 | + */ |
|
267 | + protected $has_outer = false; |
|
268 | + |
|
269 | + /** |
|
270 | + * Last error message |
|
271 | + * |
|
272 | + * @var string |
|
273 | + */ |
|
274 | + protected $error = ''; |
|
275 | + |
|
276 | + /** |
|
277 | + * Last warning message |
|
278 | + * |
|
279 | + * @var string |
|
280 | + */ |
|
281 | + protected $warning = ''; |
|
282 | + |
|
283 | + /** |
|
284 | + * Connection error vs remote error flag |
|
285 | + * |
|
286 | + * @var bool |
|
287 | + */ |
|
288 | + protected $conn_error = false; |
|
289 | + |
|
290 | + /** |
|
291 | + * Requests array for multi-query |
|
292 | + * |
|
293 | + * @var array |
|
294 | + */ |
|
295 | + protected $reqs = array(); |
|
296 | + |
|
297 | + /** |
|
298 | + * Stored mbstring encoding |
|
299 | + * |
|
300 | + * @var string |
|
301 | + */ |
|
302 | + protected $mbenc = ''; |
|
303 | + |
|
304 | + /** |
|
305 | + * Whether $result['matches'] should be a hash or an array |
|
306 | + * |
|
307 | + * @var bool |
|
308 | + */ |
|
309 | + protected $array_result = false; |
|
310 | + |
|
311 | + /** |
|
312 | + * Connect timeout |
|
313 | + * |
|
314 | + * @var int|float |
|
315 | + */ |
|
316 | + protected $timeout = 0; |
|
317 | + |
|
318 | + /** |
|
319 | + * @var string |
|
320 | + */ |
|
321 | + protected $path = ''; |
|
322 | + |
|
323 | + /** |
|
324 | + * @var resource|bool |
|
325 | + */ |
|
326 | + protected $socket = false; |
|
327 | + |
|
328 | + // known searchd commands |
|
329 | + const SEARCHD_COMMAND_SEARCH = 0; |
|
330 | + const SEARCHD_COMMAND_EXCERPT = 1; |
|
331 | + const SEARCHD_COMMAND_UPDATE = 2; |
|
332 | + const SEARCHD_COMMAND_KEYWORDS = 3; |
|
333 | + const SEARCHD_COMMAND_PERSIST = 4; |
|
334 | + const SEARCHD_COMMAND_STATUS = 5; |
|
335 | + const SEARCHD_COMMAND_FLUSH_ATTRS = 7; |
|
336 | + |
|
337 | + // current client-side command implementation versions |
|
338 | + const VER_COMMAND_SEARCH = 0x11E; |
|
339 | + const VER_COMMAND_EXCERPT = 0x104; |
|
340 | + const VER_COMMAND_UPDATE = 0x103; |
|
341 | + const VER_COMMAND_KEYWORDS = 0x100; |
|
342 | + const VER_COMMAND_STATUS = 0x101; |
|
343 | + const VER_COMMAND_QUERY = 0x100; |
|
344 | + const VER_COMMAND_FLUSH_ATTRS = 0x100; |
|
345 | + |
|
346 | + // known searchd status codes |
|
347 | + const SEARCHD_OK = 0; |
|
348 | + const SEARCHD_ERROR = 1; |
|
349 | + const SEARCHD_RETRY = 2; |
|
350 | + const SEARCHD_WARNING = 3; |
|
351 | + |
|
352 | + // known match modes |
|
353 | + const MATCH_ALL = 0; |
|
354 | + const MATCH_ANY = 1; |
|
355 | + const MATCH_PHRASE = 2; |
|
356 | + const MATCH_BOOLEAN = 3; |
|
357 | + const MATCH_EXTENDED = 4; |
|
358 | + const MATCH_FULL_SCAN = 5; |
|
359 | + const MATCH_EXTENDED2 = 6; // extended engine V2 (TEMPORARY, WILL BE REMOVED) |
|
360 | + |
|
361 | + // known ranking modes (ext2 only) |
|
362 | + const RANK_PROXIMITY_BM25 = 0; // default mode, phrase proximity major factor and BM25 minor one |
|
363 | + const RANK_BM25 = 1; // statistical mode, BM25 ranking only (faster but worse quality) |
|
364 | + const RANK_NONE = 2; // no ranking, all matches get a weight of 1 |
|
365 | + const RANK_WORD_COUNT = 3; // simple word-count weighting, rank is a weighted sum of per-field keyword |
|
366 | + // occurrence counts |
|
367 | + const RANK_PROXIMITY = 4; |
|
368 | + const RANK_MATCH_ANY = 5; |
|
369 | + const RANK_FIELD_MASK = 6; |
|
370 | + const RANK_SPH04 = 7; |
|
371 | + const RANK_EXPR = 8; |
|
372 | + const RANK_TOTAL = 9; |
|
373 | + |
|
374 | + // known sort modes |
|
375 | + const SORT_RELEVANCE = 0; |
|
376 | + const SORT_ATTR_DESC = 1; |
|
377 | + const SORT_ATTR_ASC = 2; |
|
378 | + const SORT_TIME_SEGMENTS = 3; |
|
379 | + const SORT_EXTENDED = 4; |
|
380 | + const SORT_EXPR = 5; |
|
381 | + |
|
382 | + // known filter types |
|
383 | + const FILTER_VALUES = 0; |
|
384 | + const FILTER_RANGE = 1; |
|
385 | + const FILTER_FLOAT_RANGE = 2; |
|
386 | + const FILTER_STRING = 3; |
|
387 | + |
|
388 | + // known attribute types |
|
389 | + const ATTR_INTEGER = 1; |
|
390 | + const ATTR_TIMESTAMP = 2; |
|
391 | + const ATTR_ORDINAL = 3; |
|
392 | + const ATTR_BOOL = 4; |
|
393 | + const ATTR_FLOAT = 5; |
|
394 | + const ATTR_BIGINT = 6; |
|
395 | + const ATTR_STRING = 7; |
|
396 | + const ATTR_FACTORS = 1001; |
|
397 | + const ATTR_MULTI = 0x40000001; |
|
398 | + const ATTR_MULTI64 = 0x40000002; |
|
399 | + |
|
400 | + // known grouping functions |
|
401 | + const GROUP_BY_DAY = 0; |
|
402 | + const GROUP_BY_WEEK = 1; |
|
403 | + const GROUP_BY_MONTH = 2; |
|
404 | + const GROUP_BY_YEAR = 3; |
|
405 | + const GROUP_BY_ATTR = 4; |
|
406 | + const GROUP_BY_ATTR_PAIR = 5; |
|
407 | + |
|
408 | + ///////////////////////////////////////////////////////////////////////////// |
|
409 | + // common stuff |
|
410 | + ///////////////////////////////////////////////////////////////////////////// |
|
411 | + |
|
412 | + public function __construct() |
|
413 | + { |
|
414 | + // default idf=tfidf_normalized |
|
415 | + $this->query_flags = setBit(0, 6, true); |
|
416 | + } |
|
417 | + |
|
418 | + public function __destruct() |
|
419 | + { |
|
420 | + if ($this->socket !== false) { |
|
421 | + fclose($this->socket); |
|
422 | + } |
|
423 | + } |
|
424 | + |
|
425 | + /** |
|
426 | + * @return string |
|
427 | + */ |
|
428 | + public function getLastError() |
|
429 | + { |
|
430 | + return $this->error; |
|
431 | + } |
|
432 | + |
|
433 | + /** |
|
434 | + * @return string |
|
435 | + */ |
|
436 | + public function getLastWarning() |
|
437 | + { |
|
438 | + return $this->warning; |
|
439 | + } |
|
440 | + |
|
441 | + /** |
|
442 | + * Get last error flag (to tell network connection errors from searchd errors or broken responses) |
|
443 | + * |
|
444 | + * @return bool |
|
445 | + */ |
|
446 | + public function isConnectError() |
|
447 | + { |
|
448 | + return $this->conn_error; |
|
449 | + } |
|
450 | + |
|
451 | + /** |
|
452 | + * Set searchd host name and port |
|
453 | + * |
|
454 | + * @param string $host |
|
455 | + * @param int $port |
|
456 | + */ |
|
457 | + public function setServer($host, $port = 0) |
|
458 | + { |
|
459 | + assert(is_string($host)); |
|
460 | + if ($host[0] == '/') { |
|
461 | + $this->path = 'unix://' . $host; |
|
462 | + return; |
|
463 | + } |
|
464 | + if (substr($host, 0, 7) == 'unix://') { |
|
465 | + $this->path = $host; |
|
466 | + return; |
|
467 | + } |
|
468 | + |
|
469 | + $this->host = $host; |
|
470 | + $port = intval($port); |
|
471 | + assert(0 <= $port && $port < 65536); |
|
472 | + $this->port = $port == 0 ? 9312 : $port; |
|
473 | + $this->path = ''; |
|
474 | + } |
|
475 | + |
|
476 | + /** |
|
477 | + * Set server connection timeout (0 to remove) |
|
478 | + * |
|
479 | + * @param int|float|string $timeout |
|
480 | + */ |
|
481 | + public function setConnectTimeout($timeout) |
|
482 | + { |
|
483 | + assert(is_numeric($timeout)); |
|
484 | + $this->timeout = $timeout; |
|
485 | + } |
|
486 | + |
|
487 | + /** |
|
488 | + * @param resource $handle |
|
489 | + * @param string $data |
|
490 | + * @param int $length |
|
491 | + * |
|
492 | + * @return bool |
|
493 | + */ |
|
494 | + protected function send($handle, $data, $length) |
|
495 | + { |
|
496 | + if (feof($handle) || fwrite($handle, $data, $length) !== $length) { |
|
497 | + $this->error = 'connection unexpectedly closed (timed out?)'; |
|
498 | + $this->conn_error = true; |
|
499 | + return false; |
|
500 | + } |
|
501 | + return true; |
|
502 | + } |
|
503 | + |
|
504 | + ///////////////////////////////////////////////////////////////////////////// |
|
505 | + |
|
506 | + /** |
|
507 | + * Enter mbstring workaround mode |
|
508 | + */ |
|
509 | + protected function mbPush() |
|
510 | + { |
|
511 | + $this->mbenc = ''; |
|
512 | + if (ini_get('mbstring.func_overload') & 2) { |
|
513 | + $this->mbenc = mb_internal_encoding(); |
|
514 | + mb_internal_encoding('latin1'); |
|
515 | + } |
|
516 | + } |
|
517 | + |
|
518 | + /** |
|
519 | + * Leave mbstring workaround mode |
|
520 | + */ |
|
521 | + protected function mbPop() |
|
522 | + { |
|
523 | + if ($this->mbenc) { |
|
524 | + mb_internal_encoding($this->mbenc); |
|
525 | + } |
|
526 | + } |
|
527 | + |
|
528 | + /** |
|
529 | + * Connect to searchd server |
|
530 | + * |
|
531 | + * @return bool|resource |
|
532 | + */ |
|
533 | + protected function connect() |
|
534 | + { |
|
535 | + if (is_resource($this->socket)) { |
|
536 | + // we are in persistent connection mode, so we have a socket |
|
537 | + // however, need to check whether it's still alive |
|
538 | + if (!feof($this->socket)) { |
|
539 | + return $this->socket; |
|
540 | + } |
|
541 | + |
|
542 | + // force reopen |
|
543 | + $this->socket = false; |
|
544 | + } |
|
545 | + |
|
546 | + $errno = 0; |
|
547 | + $errstr = ''; |
|
548 | + $this->conn_error = false; |
|
549 | + |
|
550 | + if ($this->path) { |
|
551 | + $host = $this->path; |
|
552 | + $port = 0; |
|
553 | + } else { |
|
554 | + $host = $this->host; |
|
555 | + $port = $this->port; |
|
556 | + } |
|
557 | + |
|
558 | + if ($this->timeout <= 0) { |
|
559 | + $fp = @fsockopen($host, $port, $errno, $errstr); |
|
560 | + } else { |
|
561 | + $fp = @fsockopen($host, $port, $errno, $errstr, $this->timeout); |
|
562 | + } |
|
563 | + |
|
564 | + if (!is_resource($fp)) { |
|
565 | + if ($this->path) { |
|
566 | + $location = $this->path; |
|
567 | + } else { |
|
568 | + $location = "{$this->host}:{$this->port}"; |
|
569 | + } |
|
570 | + |
|
571 | + $errstr = trim($errstr); |
|
572 | + $this->error = "connection to $location failed (errno=$errno, msg=$errstr)"; |
|
573 | + $this->conn_error = true; |
|
574 | + return false; |
|
575 | + } |
|
576 | + |
|
577 | + // send my version |
|
578 | + // this is a subtle part. we must do it before (!) reading back from searchd. |
|
579 | + // because otherwise under some conditions (reported on FreeBSD for instance) |
|
580 | + // TCP stack could throttle write-write-read pattern because of Nagle. |
|
581 | + if (!$this->send($fp, pack('N', 1), 4)) { |
|
582 | + fclose($fp); |
|
583 | + $this->error = 'failed to send client protocol version'; |
|
584 | + return false; |
|
585 | + } |
|
586 | + |
|
587 | + // check version |
|
588 | + list(, $v) = unpack('N*', fread($fp, 4)); |
|
589 | + $v = (int)$v; |
|
590 | + if ($v < 1) { |
|
591 | + fclose($fp); |
|
592 | + $this->error = "expected searchd protocol version 1+, got version '$v'"; |
|
593 | + return false; |
|
594 | + } |
|
595 | + |
|
596 | + return $fp; |
|
597 | + } |
|
598 | + |
|
599 | + /** |
|
600 | + * Get and check response packet from searchd server |
|
601 | + * |
|
602 | + * @param resource $fp |
|
603 | + * @param int $client_ver |
|
604 | + * |
|
605 | + * @return bool|string |
|
606 | + */ |
|
607 | + protected function getResponse($fp, $client_ver) |
|
608 | + { |
|
609 | + $response = ''; |
|
610 | + $len = 0; |
|
611 | + |
|
612 | + $header = fread($fp, 8); |
|
613 | + if (strlen($header) == 8) { |
|
614 | + list($status, $ver, $len) = array_values(unpack('n2a/Nb', $header)); |
|
615 | + $left = $len; |
|
616 | + while ($left > 0 && !feof($fp)) { |
|
617 | + $chunk = fread($fp, min(8192, $left)); |
|
618 | + if ($chunk) { |
|
619 | + $response .= $chunk; |
|
620 | + $left -= strlen($chunk); |
|
621 | + } |
|
622 | + } |
|
623 | + } |
|
624 | + |
|
625 | + if ($this->socket === false) { |
|
626 | + fclose($fp); |
|
627 | + } |
|
628 | + |
|
629 | + // check response |
|
630 | + $read = strlen($response); |
|
631 | + if (!$response || $read != $len) { |
|
632 | + $this->error = $len |
|
633 | + ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" |
|
634 | + : 'received zero-sized searchd response'; |
|
635 | + return false; |
|
636 | + } |
|
637 | + |
|
638 | + switch ($status) { |
|
639 | + case self::SEARCHD_WARNING: |
|
640 | + list(, $wlen) = unpack('N*', substr($response, 0, 4)); |
|
641 | + $this->warning = substr($response, 4, $wlen); |
|
642 | + return substr($response, 4 + $wlen); |
|
643 | + case self::SEARCHD_ERROR: |
|
644 | + $this->error = 'searchd error: ' . substr($response, 4); |
|
645 | + return false; |
|
646 | + case self::SEARCHD_RETRY: |
|
647 | + $this->error = 'temporary searchd error: ' . substr($response, 4); |
|
648 | + return false; |
|
649 | + case self::SEARCHD_OK: |
|
650 | + if ($ver < $client_ver) { // check version |
|
651 | + $this->warning = sprintf( |
|
652 | + 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work', |
|
653 | + $ver >> 8, |
|
654 | + $ver & 0xff, |
|
655 | + $client_ver >> 8, |
|
656 | + $client_ver & 0xff |
|
657 | + ); |
|
658 | + } |
|
659 | + |
|
660 | + return $response; |
|
661 | + default: |
|
662 | + $this->error = "unknown status code '$status'"; |
|
663 | + return false; |
|
664 | + } |
|
665 | + } |
|
666 | + |
|
667 | + ///////////////////////////////////////////////////////////////////////////// |
|
668 | + // searching |
|
669 | + ///////////////////////////////////////////////////////////////////////////// |
|
670 | + |
|
671 | + /** |
|
672 | + * Set offset and count into result set, and optionally set max-matches and cutoff limits |
|
673 | + * |
|
674 | + * @param int $offset |
|
675 | + * @param int $limit |
|
676 | + * @param int $max |
|
677 | + * @param int $cutoff |
|
678 | + */ |
|
679 | + public function setLimits($offset, $limit, $max = 0, $cutoff = 0) |
|
680 | + { |
|
681 | + assert(is_int($offset)); |
|
682 | + assert(is_int($limit)); |
|
683 | + assert($offset >= 0); |
|
684 | + assert($limit > 0); |
|
685 | + assert($max >= 0); |
|
686 | + $this->offset = $offset; |
|
687 | + $this->limit = $limit; |
|
688 | + if ($max > 0) { |
|
689 | + $this->max_matches = $max; |
|
690 | + } |
|
691 | + if ($cutoff > 0) { |
|
692 | + $this->cutoff = $cutoff; |
|
693 | + } |
|
694 | + } |
|
695 | + |
|
696 | + /** |
|
697 | + * Set maximum query time, in milliseconds, per-index, 0 means 'do not limit' |
|
698 | + * |
|
699 | + * @param int $max |
|
700 | + */ |
|
701 | + public function setMaxQueryTime($max) |
|
702 | + { |
|
703 | + assert(is_int($max)); |
|
704 | + assert($max >= 0); |
|
705 | + $this->max_query_time = $max; |
|
706 | + } |
|
707 | + |
|
708 | + /** |
|
709 | + * Set matching mode |
|
710 | + * |
|
711 | + * @param int $mode |
|
712 | + */ |
|
713 | + public function setMatchMode($mode) |
|
714 | + { |
|
715 | + trigger_error( |
|
716 | + 'DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API', |
|
717 | + E_USER_DEPRECATED |
|
718 | + ); |
|
719 | + assert(in_array($mode, array( |
|
720 | + self::MATCH_ALL, |
|
721 | + self::MATCH_ANY, |
|
722 | + self::MATCH_PHRASE, |
|
723 | + self::MATCH_BOOLEAN, |
|
724 | + self::MATCH_EXTENDED, |
|
725 | + self::MATCH_FULL_SCAN, |
|
726 | + self::MATCH_EXTENDED2 |
|
727 | + ))); |
|
728 | + $this->mode = $mode; |
|
729 | + } |
|
730 | + |
|
731 | + /** |
|
732 | + * Set ranking mode |
|
733 | + * |
|
734 | + * @param int $ranker |
|
735 | + * @param string $rank_expr |
|
736 | + */ |
|
737 | + public function setRankingMode($ranker, $rank_expr='') |
|
738 | + { |
|
739 | + assert($ranker === 0 || $ranker >= 1 && $ranker < self::RANK_TOTAL); |
|
740 | + assert(is_string($rank_expr)); |
|
741 | + $this->ranker = $ranker; |
|
742 | + $this->rank_expr = $rank_expr; |
|
743 | + } |
|
744 | + |
|
745 | + /** |
|
746 | + * Set matches sorting mode |
|
747 | + * |
|
748 | + * @param int $mode |
|
749 | + * @param string $sort_by |
|
750 | + */ |
|
751 | + public function setSortMode($mode, $sort_by = '') |
|
752 | + { |
|
753 | + assert(in_array($mode, array( |
|
754 | + self::SORT_RELEVANCE, |
|
755 | + self::SORT_ATTR_DESC, |
|
756 | + self::SORT_ATTR_ASC, |
|
757 | + self::SORT_TIME_SEGMENTS, |
|
758 | + self::SORT_EXTENDED, |
|
759 | + self::SORT_EXPR |
|
760 | + ))); |
|
761 | + assert(is_string($sort_by)); |
|
762 | + assert($mode == self::SORT_RELEVANCE || strlen($sort_by) > 0); |
|
763 | + |
|
764 | + $this->sort = $mode; |
|
765 | + $this->sort_by = $sort_by; |
|
766 | + } |
|
767 | + |
|
768 | + /** |
|
769 | + * Bind per-field weights by order |
|
770 | + * |
|
771 | + * @deprecated use setFieldWeights() instead |
|
772 | + */ |
|
773 | + public function setWeights() |
|
774 | + { |
|
775 | + throw new \RuntimeException('This method is now deprecated; please use setFieldWeights instead'); |
|
776 | + } |
|
777 | + |
|
778 | + /** |
|
779 | + * Bind per-field weights by name |
|
780 | + * |
|
781 | + * @param array $weights |
|
782 | + */ |
|
783 | + public function setFieldWeights(array $weights) |
|
784 | + { |
|
785 | + foreach ($weights as $name => $weight) { |
|
786 | + assert(is_string($name)); |
|
787 | + assert(is_int($weight)); |
|
788 | + } |
|
789 | + $this->field_weights = $weights; |
|
790 | + } |
|
791 | + |
|
792 | + /** |
|
793 | + * Bind per-index weights by name |
|
794 | + * |
|
795 | + * @param array $weights |
|
796 | + */ |
|
797 | + public function setIndexWeights(array $weights) |
|
798 | + { |
|
799 | + foreach ($weights as $index => $weight) { |
|
800 | + assert(is_string($index)); |
|
801 | + assert(is_int($weight)); |
|
802 | + } |
|
803 | + $this->index_weights = $weights; |
|
804 | + } |
|
805 | + |
|
806 | + /** |
|
807 | + * Set IDs range to match. Only match records if document ID is beetwen $min and $max (inclusive) |
|
808 | + * |
|
809 | + * @param int $min |
|
810 | + * @param int $max |
|
811 | + */ |
|
812 | + public function setIDRange($min, $max) |
|
813 | + { |
|
814 | + assert(is_numeric($min)); |
|
815 | + assert(is_numeric($max)); |
|
816 | + assert($min <= $max); |
|
817 | + |
|
818 | + $this->min_id = $min; |
|
819 | + $this->max_id = $max; |
|
820 | + } |
|
821 | + |
|
822 | + /** |
|
823 | + * Set values set filter. Only match records where $attribute value is in given set |
|
824 | + * |
|
825 | + * @param string $attribute |
|
826 | + * @param array $values |
|
827 | + * @param bool $exclude |
|
828 | + */ |
|
829 | + public function setFilter($attribute, array $values, $exclude = false) |
|
830 | + { |
|
831 | + assert(is_string($attribute)); |
|
832 | + assert(count($values)); |
|
833 | + |
|
834 | + foreach ($values as $value) { |
|
835 | + assert(is_numeric($value)); |
|
836 | + } |
|
837 | + |
|
838 | + $this->filters[] = array( |
|
839 | + 'type' => self::FILTER_VALUES, |
|
840 | + 'attr' => $attribute, |
|
841 | + 'exclude' => $exclude, |
|
842 | + 'values' => $values |
|
843 | + ); |
|
844 | + } |
|
845 | + |
|
846 | + /** |
|
847 | + * Set string filter |
|
848 | + * Only match records where $attribute value is equal |
|
849 | + * |
|
850 | + * @param string $attribute |
|
851 | + * @param string $value |
|
852 | + * @param bool $exclude |
|
853 | + */ |
|
854 | + public function setFilterString($attribute, $value, $exclude = false) |
|
855 | + { |
|
856 | + assert(is_string($attribute)); |
|
857 | + assert(is_string($value)); |
|
858 | + $this->filters[] = array( |
|
859 | + 'type' => self::FILTER_STRING, |
|
860 | + 'attr' => $attribute, |
|
861 | + 'exclude' => $exclude, |
|
862 | + 'value' => $value |
|
863 | + ); |
|
864 | + } |
|
865 | + |
|
866 | + /** |
|
867 | + * Set range filter |
|
868 | + * Only match records if $attribute value is beetwen $min and $max (inclusive) |
|
869 | + * |
|
870 | + * @param string $attribute |
|
871 | + * @param int $min |
|
872 | + * @param int $max |
|
873 | + * @param bool $exclude |
|
874 | + */ |
|
875 | + public function setFilterRange($attribute, $min, $max, $exclude = false) |
|
876 | + { |
|
877 | + assert(is_string($attribute)); |
|
878 | + assert(is_numeric($min)); |
|
879 | + assert(is_numeric($max)); |
|
880 | + assert($min <= $max); |
|
881 | + |
|
882 | + $this->filters[] = array( |
|
883 | + 'type' => self::FILTER_RANGE, |
|
884 | + 'attr' => $attribute, |
|
885 | + 'exclude' => $exclude, |
|
886 | + 'min' => $min, |
|
887 | + 'max' => $max |
|
888 | + ); |
|
889 | + } |
|
890 | + |
|
891 | + /** |
|
892 | + * Set float range filter |
|
893 | + * Only match records if $attribute value is beetwen $min and $max (inclusive) |
|
894 | + * |
|
895 | + * @param string $attribute |
|
896 | + * @param float $min |
|
897 | + * @param float $max |
|
898 | + * @param bool $exclude |
|
899 | + */ |
|
900 | + public function setFilterFloatRange($attribute, $min, $max, $exclude = false) |
|
901 | + { |
|
902 | + assert(is_string($attribute)); |
|
903 | + assert(is_float($min)); |
|
904 | + assert(is_float($max)); |
|
905 | + assert($min <= $max); |
|
906 | + |
|
907 | + $this->filters[] = array( |
|
908 | + 'type' => self::FILTER_FLOAT_RANGE, |
|
909 | + 'attr' => $attribute, |
|
910 | + 'exclude' => $exclude, |
|
911 | + 'min' => $min, |
|
912 | + 'max' => $max |
|
913 | + ); |
|
914 | + } |
|
915 | + |
|
916 | + /** |
|
917 | + * Setup anchor point for geosphere distance calculations |
|
918 | + * Required to use @geodist in filters and sorting |
|
919 | + * Latitude and longitude must be in radians |
|
920 | + * |
|
921 | + * @param string $attr_lat |
|
922 | + * @param string $attr_long |
|
923 | + * @param float $lat |
|
924 | + * @param float $long |
|
925 | + */ |
|
926 | + public function setGeoAnchor($attr_lat, $attr_long, $lat, $long) |
|
927 | + { |
|
928 | + assert(is_string($attr_lat)); |
|
929 | + assert(is_string($attr_long)); |
|
930 | + assert(is_float($lat)); |
|
931 | + assert(is_float($long)); |
|
932 | + |
|
933 | + $this->anchor = array( |
|
934 | + 'attrlat' => $attr_lat, |
|
935 | + 'attrlong' => $attr_long, |
|
936 | + 'lat' => $lat, |
|
937 | + 'long' => $long |
|
938 | + ); |
|
939 | + } |
|
940 | + |
|
941 | + /** |
|
942 | + * Set grouping attribute and function |
|
943 | + * |
|
944 | + * @param string $attribute |
|
945 | + * @param int $func |
|
946 | + * @param string $group_sort |
|
947 | + */ |
|
948 | + public function setGroupBy($attribute, $func, $group_sort = '@group desc') |
|
949 | + { |
|
950 | + assert(is_string($attribute)); |
|
951 | + assert(is_string($group_sort)); |
|
952 | + assert(in_array($func, array( |
|
953 | + self::GROUP_BY_DAY, |
|
954 | + self::GROUP_BY_WEEK, |
|
955 | + self::GROUP_BY_MONTH, |
|
956 | + self::GROUP_BY_YEAR, |
|
957 | + self::GROUP_BY_ATTR, |
|
958 | + self::GROUP_BY_ATTR_PAIR |
|
959 | + ))); |
|
960 | + |
|
961 | + $this->group_by = $attribute; |
|
962 | + $this->group_func = $func; |
|
963 | + $this->group_sort = $group_sort; |
|
964 | + } |
|
965 | + |
|
966 | + /** |
|
967 | + * Set count-distinct attribute for group-by queries |
|
968 | + * |
|
969 | + * @param string $attribute |
|
970 | + */ |
|
971 | + public function setGroupDistinct($attribute) |
|
972 | + { |
|
973 | + assert(is_string($attribute)); |
|
974 | + $this->group_distinct = $attribute; |
|
975 | + } |
|
976 | + |
|
977 | + /** |
|
978 | + * Set distributed retries count and delay |
|
979 | + * |
|
980 | + * @param int $count |
|
981 | + * @param int $delay |
|
982 | + */ |
|
983 | + public function setRetries($count, $delay = 0) |
|
984 | + { |
|
985 | + assert(is_int($count) && $count >= 0); |
|
986 | + assert(is_int($delay) && $delay >= 0); |
|
987 | + $this->retry_count = $count; |
|
988 | + $this->retry_delay = $delay; |
|
989 | + } |
|
990 | + |
|
991 | + /** |
|
992 | + * Set result set format (hash or array; hash by default) |
|
993 | + * PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs |
|
994 | + * |
|
995 | + * @param bool $array_result |
|
996 | + */ |
|
997 | + public function setArrayResult($array_result) |
|
998 | + { |
|
999 | + assert(is_bool($array_result)); |
|
1000 | + $this->array_result = $array_result; |
|
1001 | + } |
|
1002 | + |
|
1003 | + /** |
|
1004 | + * Set attribute values override |
|
1005 | + * There can be only one override per attribute |
|
1006 | + * $values must be a hash that maps document IDs to attribute values |
|
1007 | + * |
|
1008 | + * @deprecated Do not call this method. Use SphinxQL REMAP() function instead. |
|
1009 | + * |
|
1010 | + * @param string $attr_name |
|
1011 | + * @param string $attr_type |
|
1012 | + * @param array $values |
|
1013 | + */ |
|
1014 | + public function setOverride($attr_name, $attr_type, array $values) |
|
1015 | + { |
|
1016 | + trigger_error( |
|
1017 | + 'DEPRECATED: Do not call this method. Use SphinxQL REMAP() function instead.', |
|
1018 | + E_USER_DEPRECATED |
|
1019 | + ); |
|
1020 | + assert(is_string($attr_name)); |
|
1021 | + assert(in_array($attr_type, array( |
|
1022 | + self::ATTR_INTEGER, |
|
1023 | + self::ATTR_TIMESTAMP, |
|
1024 | + self::ATTR_BOOL, |
|
1025 | + self::ATTR_FLOAT, |
|
1026 | + self::ATTR_BIGINT |
|
1027 | + ))); |
|
1028 | + |
|
1029 | + $this->overrides[$attr_name] = array( |
|
1030 | + 'attr' => $attr_name, |
|
1031 | + 'type' => $attr_type, |
|
1032 | + 'values' => $values |
|
1033 | + ); |
|
1034 | + } |
|
1035 | + |
|
1036 | + /** |
|
1037 | + * Set select-list (attributes or expressions), SQL-like syntax |
|
1038 | + * |
|
1039 | + * @param string $select |
|
1040 | + */ |
|
1041 | + public function setSelect($select) |
|
1042 | + { |
|
1043 | + assert(is_string($select)); |
|
1044 | + $this->select = $select; |
|
1045 | + } |
|
1046 | + |
|
1047 | + /** |
|
1048 | + * @param string $flag_name |
|
1049 | + * @param string|int $flag_value |
|
1050 | + */ |
|
1051 | + public function setQueryFlag($flag_name, $flag_value) |
|
1052 | + { |
|
1053 | + $known_names = array( |
|
1054 | + 'reverse_scan', |
|
1055 | + 'sort_method', |
|
1056 | + 'max_predicted_time', |
|
1057 | + 'boolean_simplify', |
|
1058 | + 'idf', |
|
1059 | + 'global_idf', |
|
1060 | + 'low_priority' |
|
1061 | + ); |
|
1062 | + $flags = array ( |
|
1063 | + 'reverse_scan' => array(0, 1), |
|
1064 | + 'sort_method' => array('pq', 'kbuffer'), |
|
1065 | + 'max_predicted_time' => array(0), |
|
1066 | + 'boolean_simplify' => array(true, false), |
|
1067 | + 'idf' => array ('normalized', 'plain', 'tfidf_normalized', 'tfidf_unnormalized'), |
|
1068 | + 'global_idf' => array(true, false), |
|
1069 | + 'low_priority' => array(true, false) |
|
1070 | + ); |
|
1071 | + |
|
1072 | + assert(isset($flag_name, $known_names)); |
|
1073 | + assert( |
|
1074 | + in_array($flag_value, $flags[$flag_name], true) || |
|
1075 | + ($flag_name == 'max_predicted_time' && is_int($flag_value) && $flag_value >= 0) |
|
1076 | + ); |
|
1077 | + |
|
1078 | + switch ($flag_name) { |
|
1079 | + case 'reverse_scan': |
|
1080 | + $this->query_flags = setBit($this->query_flags, 0, $flag_value == 1); |
|
1081 | + break; |
|
1082 | + case 'sort_method': |
|
1083 | + $this->query_flags = setBit($this->query_flags, 1, $flag_value == 'kbuffer'); |
|
1084 | + break; |
|
1085 | + case 'max_predicted_time': |
|
1086 | + $this->query_flags = setBit($this->query_flags, 2, $flag_value > 0); |
|
1087 | + $this->predicted_time = (int)$flag_value; |
|
1088 | + break; |
|
1089 | + case 'boolean_simplify': |
|
1090 | + $this->query_flags = setBit($this->query_flags, 3, $flag_value); |
|
1091 | + break; |
|
1092 | + case 'idf': |
|
1093 | + if ($flag_value == 'normalized' || $flag_value == 'plain') { |
|
1094 | + $this->query_flags = setBit($this->query_flags, 4, $flag_value == 'plain'); |
|
1095 | + } |
|
1096 | + if ($flag_value == 'tfidf_normalized' || $flag_value == 'tfidf_unnormalized') { |
|
1097 | + $this->query_flags = setBit($this->query_flags, 6, $flag_value == 'tfidf_normalized'); |
|
1098 | + } |
|
1099 | + break; |
|
1100 | + case 'global_idf': |
|
1101 | + $this->query_flags = setBit($this->query_flags, 5, $flag_value); |
|
1102 | + break; |
|
1103 | + case 'low_priority': |
|
1104 | + $this->query_flags = setBit($this->query_flags, 8, $flag_value); |
|
1105 | + break; |
|
1106 | + } |
|
1107 | + } |
|
1108 | + |
|
1109 | + /** |
|
1110 | + * Set outer order by parameters |
|
1111 | + * |
|
1112 | + * @param string $order_by |
|
1113 | + * @param int $offset |
|
1114 | + * @param int $limit |
|
1115 | + */ |
|
1116 | + public function setOuterSelect($order_by, $offset, $limit) |
|
1117 | + { |
|
1118 | + assert(is_string($order_by)); |
|
1119 | + assert(is_int($offset)); |
|
1120 | + assert(is_int($limit)); |
|
1121 | + assert($offset >= 0); |
|
1122 | + assert($limit > 0); |
|
1123 | + |
|
1124 | + $this->outer_order_by = $order_by; |
|
1125 | + $this->outer_offset = $offset; |
|
1126 | + $this->outer_limit = $limit; |
|
1127 | + $this->has_outer = true; |
|
1128 | + } |
|
1129 | + |
|
1130 | + |
|
1131 | + ////////////////////////////////////////////////////////////////////////////// |
|
1132 | + |
|
1133 | + /** |
|
1134 | + * Clear all filters (for multi-queries) |
|
1135 | + */ |
|
1136 | + public function resetFilters() |
|
1137 | + { |
|
1138 | + $this->filters = array(); |
|
1139 | + $this->anchor = array(); |
|
1140 | + } |
|
1141 | + |
|
1142 | + /** |
|
1143 | + * Clear groupby settings (for multi-queries) |
|
1144 | + */ |
|
1145 | + public function resetGroupBy() |
|
1146 | + { |
|
1147 | + $this->group_by = ''; |
|
1148 | + $this->group_func = self::GROUP_BY_DAY; |
|
1149 | + $this->group_sort = '@group desc'; |
|
1150 | + $this->group_distinct = ''; |
|
1151 | + } |
|
1152 | + |
|
1153 | + /** |
|
1154 | + * Clear all attribute value overrides (for multi-queries) |
|
1155 | + */ |
|
1156 | + public function resetOverrides() |
|
1157 | + { |
|
1158 | + $this->overrides = array(); |
|
1159 | + } |
|
1160 | + |
|
1161 | + public function resetQueryFlag() |
|
1162 | + { |
|
1163 | + $this->query_flags = setBit(0, 6, true); // default idf=tfidf_normalized |
|
1164 | + $this->predicted_time = 0; |
|
1165 | + } |
|
1166 | + |
|
1167 | + public function resetOuterSelect() |
|
1168 | + { |
|
1169 | + $this->outer_order_by = ''; |
|
1170 | + $this->outer_offset = 0; |
|
1171 | + $this->outer_limit = 0; |
|
1172 | + $this->has_outer = false; |
|
1173 | + } |
|
1174 | + |
|
1175 | + ////////////////////////////////////////////////////////////////////////////// |
|
1176 | + |
|
1177 | + /** |
|
1178 | + * Connect to searchd server, run given search query through given indexes, and return the search results |
|
1179 | + * |
|
1180 | + * @param string $query |
|
1181 | + * @param string $index |
|
1182 | + * @param string $comment |
|
1183 | + * |
|
1184 | + * @return bool |
|
1185 | + */ |
|
1186 | + public function query($query, $index = '*', $comment = '') |
|
1187 | + { |
|
1188 | + assert(empty($this->reqs)); |
|
1189 | + |
|
1190 | + $this->addQuery($query, $index, $comment); |
|
1191 | + $results = $this->runQueries(); |
|
1192 | + $this->reqs = array(); // just in case it failed too early |
|
1193 | + |
|
1194 | + if (!is_array($results)) { |
|
1195 | + return false; // probably network error; error message should be already filled |
|
1196 | + } |
|
1197 | + |
|
1198 | + $this->error = $results[0]['error']; |
|
1199 | + $this->warning = $results[0]['warning']; |
|
1200 | + |
|
1201 | + if ($results[0]['status'] == self::SEARCHD_ERROR) { |
|
1202 | + return false; |
|
1203 | + } else { |
|
1204 | + return $results[0]; |
|
1205 | + } |
|
1206 | + } |
|
1207 | + |
|
1208 | + /** |
|
1209 | + * Helper to pack floats in network byte order |
|
1210 | + * |
|
1211 | + * @param float $float |
|
1212 | + * |
|
1213 | + * @return string |
|
1214 | + */ |
|
1215 | + protected function packFloat($float) |
|
1216 | + { |
|
1217 | + $t1 = pack('f', $float); // machine order |
|
1218 | + list(, $t2) = unpack('L*', $t1); // int in machine order |
|
1219 | + return pack('N', $t2); |
|
1220 | + } |
|
1221 | + |
|
1222 | + /** |
|
1223 | + * Add query to multi-query batch |
|
1224 | + * Returns index into results array from RunQueries() call |
|
1225 | + * |
|
1226 | + * @param string $query |
|
1227 | + * @param string $index |
|
1228 | + * @param string $comment |
|
1229 | + * |
|
1230 | + * @return int |
|
1231 | + */ |
|
1232 | + public function addQuery($query, $index = '*', $comment = '') |
|
1233 | + { |
|
1234 | + // mbstring workaround |
|
1235 | + $this->mbPush(); |
|
1236 | + |
|
1237 | + // build request |
|
1238 | + $req = pack('NNNNN', $this->query_flags, $this->offset, $this->limit, $this->mode, $this->ranker); |
|
1239 | + if ($this->ranker == self::RANK_EXPR) { |
|
1240 | + $req .= pack('N', strlen($this->rank_expr)) . $this->rank_expr; |
|
1241 | + } |
|
1242 | + $req .= pack('N', $this->sort); // (deprecated) sort mode |
|
1243 | + $req .= pack('N', strlen($this->sort_by)) . $this->sort_by; |
|
1244 | + $req .= pack('N', strlen($query)) . $query; // query itself |
|
1245 | + $req .= pack('N', count($this->weights)); // weights |
|
1246 | + foreach ($this->weights as $weight) { |
|
1247 | + $req .= pack('N', (int)$weight); |
|
1248 | + } |
|
1249 | + $req .= pack('N', strlen($index)) . $index; // indexes |
|
1250 | + $req .= pack('N', 1); // id64 range marker |
|
1251 | + $req .= pack64IntUnsigned($this->min_id) . pack64IntUnsigned($this->max_id); // id64 range |
|
1252 | + |
|
1253 | + // filters |
|
1254 | + $req .= pack('N', count($this->filters)); |
|
1255 | + foreach ($this->filters as $filter) { |
|
1256 | + $req .= pack('N', strlen($filter['attr'])) . $filter['attr']; |
|
1257 | + $req .= pack('N', $filter['type']); |
|
1258 | + switch ($filter['type']) { |
|
1259 | + case self::FILTER_VALUES: |
|
1260 | + $req .= pack('N', count($filter['values'])); |
|
1261 | + foreach ($filter['values'] as $value) { |
|
1262 | + $req .= pack64IntSigned($value); |
|
1263 | + } |
|
1264 | + break; |
|
1265 | + case self::FILTER_RANGE: |
|
1266 | + $req .= pack64IntSigned($filter['min']) . pack64IntSigned($filter['max']); |
|
1267 | + break; |
|
1268 | + case self::FILTER_FLOAT_RANGE: |
|
1269 | + $req .= $this->packFloat($filter['min']) . $this->packFloat($filter['max']); |
|
1270 | + break; |
|
1271 | + case self::FILTER_STRING: |
|
1272 | + $req .= pack('N', strlen($filter['value'])) . $filter['value']; |
|
1273 | + break; |
|
1274 | + default: |
|
1275 | + assert(0 && 'internal error: unhandled filter type'); |
|
1276 | + } |
|
1277 | + $req .= pack('N', $filter['exclude']); |
|
1278 | + } |
|
1279 | + |
|
1280 | + // group-by clause, max-matches count, group-sort clause, cutoff count |
|
1281 | + $req .= pack('NN', $this->group_func, strlen($this->group_by)) . $this->group_by; |
|
1282 | + $req .= pack('N', $this->max_matches); |
|
1283 | + $req .= pack('N', strlen($this->group_sort)) . $this->group_sort; |
|
1284 | + $req .= pack('NNN', $this->cutoff, $this->retry_count, $this->retry_delay); |
|
1285 | + $req .= pack('N', strlen($this->group_distinct)) . $this->group_distinct; |
|
1286 | + |
|
1287 | + // anchor point |
|
1288 | + if (empty($this->anchor)) { |
|
1289 | + $req .= pack('N', 0); |
|
1290 | + } else { |
|
1291 | + $a =& $this->anchor; |
|
1292 | + $req .= pack('N', 1); |
|
1293 | + $req .= pack('N', strlen($a['attrlat'])) . $a['attrlat']; |
|
1294 | + $req .= pack('N', strlen($a['attrlong'])) . $a['attrlong']; |
|
1295 | + $req .= $this->packFloat($a['lat']) . $this->packFloat($a['long']); |
|
1296 | + } |
|
1297 | + |
|
1298 | + // per-index weights |
|
1299 | + $req .= pack('N', count($this->index_weights)); |
|
1300 | + foreach ($this->index_weights as $idx => $weight) { |
|
1301 | + $req .= pack('N', strlen($idx)) . $idx . pack('N', $weight); |
|
1302 | + } |
|
1303 | + |
|
1304 | + // max query time |
|
1305 | + $req .= pack('N', $this->max_query_time); |
|
1306 | + |
|
1307 | + // per-field weights |
|
1308 | + $req .= pack('N', count($this->field_weights)); |
|
1309 | + foreach ($this->field_weights as $field => $weight) { |
|
1310 | + $req .= pack('N', strlen($field)) . $field . pack('N', $weight); |
|
1311 | + } |
|
1312 | + |
|
1313 | + // comment |
|
1314 | + $req .= pack('N', strlen($comment)) . $comment; |
|
1315 | + |
|
1316 | + // attribute overrides |
|
1317 | + $req .= pack('N', count($this->overrides)); |
|
1318 | + foreach ($this->overrides as $key => $entry) { |
|
1319 | + $req .= pack('N', strlen($entry['attr'])) . $entry['attr']; |
|
1320 | + $req .= pack('NN', $entry['type'], count($entry['values'])); |
|
1321 | + foreach ($entry['values'] as $id => $val) { |
|
1322 | + assert(is_numeric($id)); |
|
1323 | + assert(is_numeric($val)); |
|
1324 | + |
|
1325 | + $req .= pack64IntUnsigned($id); |
|
1326 | + switch ($entry['type']) { |
|
1327 | + case self::ATTR_FLOAT: |
|
1328 | + $req .= $this->packFloat($val); |
|
1329 | + break; |
|
1330 | + case self::ATTR_BIGINT: |
|
1331 | + $req .= pack64IntSigned($val); |
|
1332 | + break; |
|
1333 | + default: |
|
1334 | + $req .= pack('N', $val); |
|
1335 | + break; |
|
1336 | + } |
|
1337 | + } |
|
1338 | + } |
|
1339 | + |
|
1340 | + // select-list |
|
1341 | + $req .= pack('N', strlen($this->select)) . $this->select; |
|
1342 | + |
|
1343 | + // max_predicted_time |
|
1344 | + if ($this->predicted_time > 0) { |
|
1345 | + $req .= pack('N', (int)$this->predicted_time); |
|
1346 | + } |
|
1347 | + |
|
1348 | + $req .= pack('N', strlen($this->outer_order_by)) . $this->outer_order_by; |
|
1349 | + $req .= pack('NN', $this->outer_offset, $this->outer_limit); |
|
1350 | + if ($this->has_outer) { |
|
1351 | + $req .= pack('N', 1); |
|
1352 | + } else { |
|
1353 | + $req .= pack('N', 0); |
|
1354 | + } |
|
1355 | + |
|
1356 | + // mbstring workaround |
|
1357 | + $this->mbPop(); |
|
1358 | + |
|
1359 | + // store request to requests array |
|
1360 | + $this->reqs[] = $req; |
|
1361 | + return count($this->reqs) - 1; |
|
1362 | + } |
|
1363 | + |
|
1364 | + /** |
|
1365 | + * Connect to searchd, run queries batch, and return an array of result sets |
|
1366 | + * |
|
1367 | + * @return array|bool |
|
1368 | + */ |
|
1369 | + public function runQueries() |
|
1370 | + { |
|
1371 | + if (empty($this->reqs)) { |
|
1372 | + $this->error = 'no queries defined, issue AddQuery() first'; |
|
1373 | + return false; |
|
1374 | + } |
|
1375 | + |
|
1376 | + // mbstring workaround |
|
1377 | + $this->mbPush(); |
|
1378 | + |
|
1379 | + if (($fp = $this->connect()) === false) { |
|
1380 | + $this->mbPop(); |
|
1381 | + return false; |
|
1382 | + } |
|
1383 | + |
|
1384 | + // send query, get response |
|
1385 | + $nreqs = count($this->reqs); |
|
1386 | + $req = join('', $this->reqs); |
|
1387 | + $len = 8 + strlen($req); |
|
1388 | + // add header |
|
1389 | + $req = pack('nnNNN', self::SEARCHD_COMMAND_SEARCH, self::VER_COMMAND_SEARCH, $len, 0, $nreqs) . $req; |
|
1390 | + |
|
1391 | + if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_SEARCH))) { |
|
1392 | + $this->mbPop(); |
|
1393 | + return false; |
|
1394 | + } |
|
1395 | + |
|
1396 | + // query sent ok; we can reset reqs now |
|
1397 | + $this->reqs = array(); |
|
1398 | + |
|
1399 | + // parse and return response |
|
1400 | + return $this->parseSearchResponse($response, $nreqs); |
|
1401 | + } |
|
1402 | + |
|
1403 | + /** |
|
1404 | + * Parse and return search query (or queries) response |
|
1405 | + * |
|
1406 | + * @param string $response |
|
1407 | + * @param int $nreqs |
|
1408 | + * |
|
1409 | + * @return array |
|
1410 | + */ |
|
1411 | + protected function parseSearchResponse($response, $nreqs) |
|
1412 | + { |
|
1413 | + $p = 0; // current position |
|
1414 | + $max = strlen($response); // max position for checks, to protect against broken responses |
|
1415 | + |
|
1416 | + $results = array(); |
|
1417 | + for ($ires = 0; $ires < $nreqs && $p < $max; $ires++) { |
|
1418 | + $results[] = array(); |
|
1419 | + $result =& $results[$ires]; |
|
1420 | + |
|
1421 | + $result['error'] = ''; |
|
1422 | + $result['warning'] = ''; |
|
1423 | + |
|
1424 | + // extract status |
|
1425 | + list(, $status) = unpack('N*', substr($response, $p, 4)); |
|
1426 | + $p += 4; |
|
1427 | + $result['status'] = $status; |
|
1428 | + if ($status != self::SEARCHD_OK) { |
|
1429 | + list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1430 | + $p += 4; |
|
1431 | + $message = substr($response, $p, $len); |
|
1432 | + $p += $len; |
|
1433 | + |
|
1434 | + if ($status == self::SEARCHD_WARNING) { |
|
1435 | + $result['warning'] = $message; |
|
1436 | + } else { |
|
1437 | + $result['error'] = $message; |
|
1438 | + continue; |
|
1439 | + } |
|
1440 | + } |
|
1441 | + |
|
1442 | + // read schema |
|
1443 | + $fields = array(); |
|
1444 | + $attrs = array(); |
|
1445 | + |
|
1446 | + list(, $nfields) = unpack('N*', substr($response, $p, 4)); |
|
1447 | + $p += 4; |
|
1448 | + while ($nfields --> 0 && $p < $max) { |
|
1449 | + list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1450 | + $p += 4; |
|
1451 | + $fields[] = substr($response, $p, $len); |
|
1452 | + $p += $len; |
|
1453 | + } |
|
1454 | + $result['fields'] = $fields; |
|
1455 | + |
|
1456 | + list(, $n_attrs) = unpack('N*', substr($response, $p, 4)); |
|
1457 | + $p += 4; |
|
1458 | + while ($n_attrs --> 0 && $p < $max) { |
|
1459 | + list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1460 | + $p += 4; |
|
1461 | + $attr = substr($response, $p, $len); |
|
1462 | + $p += $len; |
|
1463 | + list(, $type) = unpack('N*', substr($response, $p, 4)); |
|
1464 | + $p += 4; |
|
1465 | + $attrs[$attr] = $type; |
|
1466 | + } |
|
1467 | + $result['attrs'] = $attrs; |
|
1468 | + |
|
1469 | + // read match count |
|
1470 | + list(, $count) = unpack('N*', substr($response, $p, 4)); |
|
1471 | + $p += 4; |
|
1472 | + list(, $id64) = unpack('N*', substr($response, $p, 4)); |
|
1473 | + $p += 4; |
|
1474 | + |
|
1475 | + // read matches |
|
1476 | + $idx = -1; |
|
1477 | + while ($count --> 0 && $p < $max) { |
|
1478 | + // index into result array |
|
1479 | + $idx++; |
|
1480 | + |
|
1481 | + // parse document id and weight |
|
1482 | + if ($id64) { |
|
1483 | + $doc = unpack64IntUnsigned(substr($response, $p, 8)); |
|
1484 | + $p += 8; |
|
1485 | + list(,$weight) = unpack('N*', substr($response, $p, 4)); |
|
1486 | + $p += 4; |
|
1487 | + } else { |
|
1488 | + list($doc, $weight) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1489 | + $p += 8; |
|
1490 | + $doc = fixUInt($doc); |
|
1491 | + } |
|
1492 | + $weight = sprintf('%u', $weight); |
|
1493 | + |
|
1494 | + // create match entry |
|
1495 | + if ($this->array_result) { |
|
1496 | + $result['matches'][$idx] = array('id' => $doc, 'weight' => $weight); |
|
1497 | + } else { |
|
1498 | + $result['matches'][$doc]['weight'] = $weight; |
|
1499 | + } |
|
1500 | + |
|
1501 | + // parse and create attributes |
|
1502 | + $attr_values = array(); |
|
1503 | + foreach ($attrs as $attr => $type) { |
|
1504 | + // handle 64bit int |
|
1505 | + if ($type == self::ATTR_BIGINT) { |
|
1506 | + $attr_values[$attr] = unpack64IntSigned(substr($response, $p, 8)); |
|
1507 | + $p += 8; |
|
1508 | + continue; |
|
1509 | + } |
|
1510 | + |
|
1511 | + // handle floats |
|
1512 | + if ($type == self::ATTR_FLOAT) { |
|
1513 | + list(, $u_value) = unpack('N*', substr($response, $p, 4)); |
|
1514 | + $p += 4; |
|
1515 | + list(, $f_value) = unpack('f*', pack('L', $u_value)); |
|
1516 | + $attr_values[$attr] = $f_value; |
|
1517 | + continue; |
|
1518 | + } |
|
1519 | + |
|
1520 | + // handle everything else as unsigned int |
|
1521 | + list(, $val) = unpack('N*', substr($response, $p, 4)); |
|
1522 | + $p += 4; |
|
1523 | + if ($type == self::ATTR_MULTI) { |
|
1524 | + $attr_values[$attr] = array(); |
|
1525 | + $n_values = $val; |
|
1526 | + while ($n_values --> 0 && $p < $max) { |
|
1527 | + list(, $val) = unpack('N*', substr($response, $p, 4)); |
|
1528 | + $p += 4; |
|
1529 | + $attr_values[$attr][] = fixUInt($val); |
|
1530 | + } |
|
1531 | + } elseif ($type == self::ATTR_MULTI64) { |
|
1532 | + $attr_values[$attr] = array(); |
|
1533 | + $n_values = $val; |
|
1534 | + while ($n_values > 0 && $p < $max) { |
|
1535 | + $attr_values[$attr][] = unpack64IntSigned(substr($response, $p, 8)); |
|
1536 | + $p += 8; |
|
1537 | + $n_values -= 2; |
|
1538 | + } |
|
1539 | + } elseif ($type == self::ATTR_STRING) { |
|
1540 | + $attr_values[$attr] = substr($response, $p, $val); |
|
1541 | + $p += $val; |
|
1542 | + } elseif ($type == self::ATTR_FACTORS) { |
|
1543 | + $attr_values[$attr] = substr($response, $p, $val - 4); |
|
1544 | + $p += $val-4; |
|
1545 | + } else { |
|
1546 | + $attr_values[$attr] = fixUInt($val); |
|
1547 | + } |
|
1548 | + } |
|
1549 | + |
|
1550 | + if ($this->array_result) { |
|
1551 | + $result['matches'][$idx]['attrs'] = $attr_values; |
|
1552 | + } else { |
|
1553 | + $result['matches'][$doc]['attrs'] = $attr_values; |
|
1554 | + } |
|
1555 | + } |
|
1556 | + |
|
1557 | + list($total, $total_found, $msecs, $words) = array_values(unpack('N*N*N*N*', substr($response, $p, 16))); |
|
1558 | + $result['total'] = sprintf('%u', $total); |
|
1559 | + $result['total_found'] = sprintf('%u', $total_found); |
|
1560 | + $result['time'] = sprintf('%.3f', $msecs / 1000); |
|
1561 | + $p += 16; |
|
1562 | + |
|
1563 | + while ($words --> 0 && $p < $max) { |
|
1564 | + list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
1565 | + $p += 4; |
|
1566 | + $word = substr($response, $p, $len); |
|
1567 | + $p += $len; |
|
1568 | + list($docs, $hits) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1569 | + $p += 8; |
|
1570 | + $result['words'][$word] = array ( |
|
1571 | + 'docs' => sprintf('%u', $docs), |
|
1572 | + 'hits' => sprintf('%u', $hits) |
|
1573 | + ); |
|
1574 | + } |
|
1575 | + } |
|
1576 | + |
|
1577 | + $this->mbPop(); |
|
1578 | + return $results; |
|
1579 | + } |
|
1580 | + |
|
1581 | + ///////////////////////////////////////////////////////////////////////////// |
|
1582 | + // excerpts generation |
|
1583 | + ///////////////////////////////////////////////////////////////////////////// |
|
1584 | + |
|
1585 | + /** |
|
1586 | + * Connect to searchd server, and generate exceprts (snippets) of given documents for given query. |
|
1587 | + * Returns false on failure, an array of snippets on success |
|
1588 | + * |
|
1589 | + * @param array $docs |
|
1590 | + * @param string $index |
|
1591 | + * @param string $words |
|
1592 | + * @param array $opts |
|
1593 | + * |
|
1594 | + * @return array|bool |
|
1595 | + */ |
|
1596 | + public function buildExcerpts(array $docs, $index, $words, array $opts = array()) |
|
1597 | + { |
|
1598 | + assert(is_string($index)); |
|
1599 | + assert(is_string($words)); |
|
1600 | + |
|
1601 | + $this->mbPush(); |
|
1602 | + |
|
1603 | + if (($fp = $this->connect()) === false) { |
|
1604 | + $this->mbPop(); |
|
1605 | + return false; |
|
1606 | + } |
|
1607 | + |
|
1608 | + ///////////////// |
|
1609 | + // fixup options |
|
1610 | + ///////////////// |
|
1611 | + |
|
1612 | + $opts = array_merge(array( |
|
1613 | + 'before_match' => '<b>', |
|
1614 | + 'after_match' => '</b>', |
|
1615 | + 'chunk_separator' => ' ... ', |
|
1616 | + 'limit' => 256, |
|
1617 | + 'limit_passages' => 0, |
|
1618 | + 'limit_words' => 0, |
|
1619 | + 'around' => 5, |
|
1620 | + 'exact_phrase' => false, |
|
1621 | + 'single_passage' => false, |
|
1622 | + 'use_boundaries' => false, |
|
1623 | + 'weight_order' => false, |
|
1624 | + 'query_mode' => false, |
|
1625 | + 'force_all_words' => false, |
|
1626 | + 'start_passage_id' => 1, |
|
1627 | + 'load_files' => false, |
|
1628 | + 'html_strip_mode' => 'index', |
|
1629 | + 'allow_empty' => false, |
|
1630 | + 'passage_boundary' => 'none', |
|
1631 | + 'emit_zones' => false, |
|
1632 | + 'load_files_scattered' => false |
|
1633 | + ), $opts); |
|
1634 | + |
|
1635 | + ///////////////// |
|
1636 | + // build request |
|
1637 | + ///////////////// |
|
1638 | + |
|
1639 | + // v.1.2 req |
|
1640 | + $flags = 1; // remove spaces |
|
1641 | + if ($opts['exact_phrase']) { |
|
1642 | + $flags |= 2; |
|
1643 | + } |
|
1644 | + if ($opts['single_passage']) { |
|
1645 | + $flags |= 4; |
|
1646 | + } |
|
1647 | + if ($opts['use_boundaries']) { |
|
1648 | + $flags |= 8; |
|
1649 | + } |
|
1650 | + if ($opts['weight_order']) { |
|
1651 | + $flags |= 16; |
|
1652 | + } |
|
1653 | + if ($opts['query_mode']) { |
|
1654 | + $flags |= 32; |
|
1655 | + } |
|
1656 | + if ($opts['force_all_words']) { |
|
1657 | + $flags |= 64; |
|
1658 | + } |
|
1659 | + if ($opts['load_files']) { |
|
1660 | + $flags |= 128; |
|
1661 | + } |
|
1662 | + if ($opts['allow_empty']) { |
|
1663 | + $flags |= 256; |
|
1664 | + } |
|
1665 | + if ($opts['emit_zones']) { |
|
1666 | + $flags |= 512; |
|
1667 | + } |
|
1668 | + if ($opts['load_files_scattered']) { |
|
1669 | + $flags |= 1024; |
|
1670 | + } |
|
1671 | + $req = pack('NN', 0, $flags); // mode=0, flags=$flags |
|
1672 | + $req .= pack('N', strlen($index)) . $index; // req index |
|
1673 | + $req .= pack('N', strlen($words)) . $words; // req words |
|
1674 | + |
|
1675 | + // options |
|
1676 | + $req .= pack('N', strlen($opts['before_match'])) . $opts['before_match']; |
|
1677 | + $req .= pack('N', strlen($opts['after_match'])) . $opts['after_match']; |
|
1678 | + $req .= pack('N', strlen($opts['chunk_separator'])) . $opts['chunk_separator']; |
|
1679 | + $req .= pack('NN', (int)$opts['limit'], (int)$opts['around']); |
|
1680 | + // v.1.2 |
|
1681 | + $req .= pack('NNN', (int)$opts['limit_passages'], (int)$opts['limit_words'], (int)$opts['start_passage_id']); |
|
1682 | + $req .= pack('N', strlen($opts['html_strip_mode'])) . $opts['html_strip_mode']; |
|
1683 | + $req .= pack('N', strlen($opts['passage_boundary'])) . $opts['passage_boundary']; |
|
1684 | + |
|
1685 | + // documents |
|
1686 | + $req .= pack('N', count($docs)); |
|
1687 | + foreach ($docs as $doc) { |
|
1688 | + assert(is_string($doc)); |
|
1689 | + $req .= pack('N', strlen($doc)) . $doc; |
|
1690 | + } |
|
1691 | + |
|
1692 | + //////////////////////////// |
|
1693 | + // send query, get response |
|
1694 | + //////////////////////////// |
|
1695 | + |
|
1696 | + $len = strlen($req); |
|
1697 | + $req = pack('nnN', self::SEARCHD_COMMAND_EXCERPT, self::VER_COMMAND_EXCERPT, $len) . $req; // add header |
|
1698 | + if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_EXCERPT))) { |
|
1699 | + $this->mbPop(); |
|
1700 | + return false; |
|
1701 | + } |
|
1702 | + |
|
1703 | + ////////////////// |
|
1704 | + // parse response |
|
1705 | + ////////////////// |
|
1706 | + |
|
1707 | + $pos = 0; |
|
1708 | + $res = array(); |
|
1709 | + $rlen = strlen($response); |
|
1710 | + $count = count($docs); |
|
1711 | + while ($count--) { |
|
1712 | + list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1713 | + $pos += 4; |
|
1714 | + |
|
1715 | + if ($pos + $len > $rlen) { |
|
1716 | + $this->error = 'incomplete reply'; |
|
1717 | + $this->mbPop(); |
|
1718 | + return false; |
|
1719 | + } |
|
1720 | + $res[] = $len ? substr($response, $pos, $len) : ''; |
|
1721 | + $pos += $len; |
|
1722 | + } |
|
1723 | + |
|
1724 | + $this->mbPop(); |
|
1725 | + return $res; |
|
1726 | + } |
|
1727 | + |
|
1728 | + |
|
1729 | + ///////////////////////////////////////////////////////////////////////////// |
|
1730 | + // keyword generation |
|
1731 | + ///////////////////////////////////////////////////////////////////////////// |
|
1732 | + |
|
1733 | + /** |
|
1734 | + * Connect to searchd server, and generate keyword list for a given query returns false on failure, |
|
1735 | + * an array of words on success |
|
1736 | + * |
|
1737 | + * @param string $query |
|
1738 | + * @param string $index |
|
1739 | + * @param bool $hits |
|
1740 | + * |
|
1741 | + * @return array|bool |
|
1742 | + */ |
|
1743 | + public function buildKeywords($query, $index, $hits) |
|
1744 | + { |
|
1745 | + assert(is_string($query)); |
|
1746 | + assert(is_string($index)); |
|
1747 | + assert(is_bool($hits)); |
|
1748 | + |
|
1749 | + $this->mbPush(); |
|
1750 | + |
|
1751 | + if (($fp = $this->connect()) === false) { |
|
1752 | + $this->mbPop(); |
|
1753 | + return false; |
|
1754 | + } |
|
1755 | + |
|
1756 | + ///////////////// |
|
1757 | + // build request |
|
1758 | + ///////////////// |
|
1759 | + |
|
1760 | + // v.1.0 req |
|
1761 | + $req = pack('N', strlen($query)) . $query; // req query |
|
1762 | + $req .= pack('N', strlen($index)) . $index; // req index |
|
1763 | + $req .= pack('N', (int)$hits); |
|
1764 | + |
|
1765 | + //////////////////////////// |
|
1766 | + // send query, get response |
|
1767 | + //////////////////////////// |
|
1768 | + |
|
1769 | + $len = strlen($req); |
|
1770 | + $req = pack('nnN', self::SEARCHD_COMMAND_KEYWORDS, self::VER_COMMAND_KEYWORDS, $len) . $req; // add header |
|
1771 | + if (!$this->send($fp, $req, $len + 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_KEYWORDS))) { |
|
1772 | + $this->mbPop(); |
|
1773 | + return false; |
|
1774 | + } |
|
1775 | + |
|
1776 | + ////////////////// |
|
1777 | + // parse response |
|
1778 | + ////////////////// |
|
1779 | + |
|
1780 | + $pos = 0; |
|
1781 | + $res = array(); |
|
1782 | + $rlen = strlen($response); |
|
1783 | + list(, $nwords) = unpack('N*', substr($response, $pos, 4)); |
|
1784 | + $pos += 4; |
|
1785 | + for ($i = 0; $i < $nwords; $i++) { |
|
1786 | + list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1787 | + $pos += 4; |
|
1788 | + $tokenized = $len ? substr($response, $pos, $len) : ''; |
|
1789 | + $pos += $len; |
|
1790 | + |
|
1791 | + list(, $len) = unpack('N*', substr($response, $pos, 4)); |
|
1792 | + $pos += 4; |
|
1793 | + $normalized = $len ? substr($response, $pos, $len) : ''; |
|
1794 | + $pos += $len; |
|
1795 | + |
|
1796 | + $res[] = array( |
|
1797 | + 'tokenized' => $tokenized, |
|
1798 | + 'normalized' => $normalized |
|
1799 | + ); |
|
1800 | + |
|
1801 | + if ($hits) { |
|
1802 | + list($ndocs, $nhits) = array_values(unpack('N*N*', substr($response, $pos, 8))); |
|
1803 | + $pos += 8; |
|
1804 | + $res[$i]['docs'] = $ndocs; |
|
1805 | + $res[$i]['hits'] = $nhits; |
|
1806 | + } |
|
1807 | + |
|
1808 | + if ($pos > $rlen) { |
|
1809 | + $this->error = 'incomplete reply'; |
|
1810 | + $this->mbPop(); |
|
1811 | + return false; |
|
1812 | + } |
|
1813 | + } |
|
1814 | + |
|
1815 | + $this->mbPop(); |
|
1816 | + return $res; |
|
1817 | + } |
|
1818 | + |
|
1819 | + /** |
|
1820 | + * @param string $string |
|
1821 | + * |
|
1822 | + * @return string |
|
1823 | + */ |
|
1824 | + public function escapeString($string) |
|
1825 | + { |
|
1826 | + $from = array('\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=', '<'); |
|
1827 | + $to = array('\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=', '\<'); |
|
1828 | + |
|
1829 | + return str_replace($from, $to, $string); |
|
1830 | + } |
|
1831 | + |
|
1832 | + ///////////////////////////////////////////////////////////////////////////// |
|
1833 | + // attribute updates |
|
1834 | + ///////////////////////////////////////////////////////////////////////////// |
|
1835 | + |
|
1836 | + /** |
|
1837 | + * Batch update given attributes in given rows in given indexes |
|
1838 | + * Returns amount of updated documents (0 or more) on success, or -1 on failure |
|
1839 | + * |
|
1840 | + * @param string $index |
|
1841 | + * @param array $attrs |
|
1842 | + * @param array $values |
|
1843 | + * @param bool $mva |
|
1844 | + * @param bool $ignore_non_existent |
|
1845 | + * |
|
1846 | + * @return int |
|
1847 | + */ |
|
1848 | + public function updateAttributes($index, array $attrs, array $values, $mva = false, $ignore_non_existent = false) |
|
1849 | + { |
|
1850 | + // verify everything |
|
1851 | + assert(is_string($index)); |
|
1852 | + assert(is_bool($mva)); |
|
1853 | + assert(is_bool($ignore_non_existent)); |
|
1854 | + |
|
1855 | + foreach ($attrs as $attr) { |
|
1856 | + assert(is_string($attr)); |
|
1857 | + } |
|
1858 | + |
|
1859 | + foreach ($values as $id => $entry) { |
|
1860 | + assert(is_numeric($id)); |
|
1861 | + assert(is_array($entry)); |
|
1862 | + assert(count($entry) == count($attrs)); |
|
1863 | + foreach ($entry as $v) { |
|
1864 | + if ($mva) { |
|
1865 | + assert(is_array($v)); |
|
1866 | + foreach ($v as $vv) { |
|
1867 | + assert(is_int($vv)); |
|
1868 | + } |
|
1869 | + } else { |
|
1870 | + assert(is_int($v)); |
|
1871 | + } |
|
1872 | + } |
|
1873 | + } |
|
1874 | + |
|
1875 | + // build request |
|
1876 | + $this->mbPush(); |
|
1877 | + $req = pack('N', strlen($index)) . $index; |
|
1878 | + |
|
1879 | + $req .= pack('N', count($attrs)); |
|
1880 | + $req .= pack('N', $ignore_non_existent ? 1 : 0); |
|
1881 | + foreach ($attrs as $attr) { |
|
1882 | + $req .= pack('N', strlen($attr)) . $attr; |
|
1883 | + $req .= pack('N', $mva ? 1 : 0); |
|
1884 | + } |
|
1885 | + |
|
1886 | + $req .= pack('N', count($values)); |
|
1887 | + foreach ($values as $id => $entry) { |
|
1888 | + $req .= pack64IntUnsigned($id); |
|
1889 | + foreach ($entry as $v) { |
|
1890 | + $req .= pack('N', $mva ? count($v) : $v); |
|
1891 | + if ($mva) { |
|
1892 | + foreach ($v as $vv) { |
|
1893 | + $req .= pack('N', $vv); |
|
1894 | + } |
|
1895 | + } |
|
1896 | + } |
|
1897 | + } |
|
1898 | + |
|
1899 | + // connect, send query, get response |
|
1900 | + if (($fp = $this->connect()) === false) { |
|
1901 | + $this->mbPop(); |
|
1902 | + return -1; |
|
1903 | + } |
|
1904 | + |
|
1905 | + $len = strlen($req); |
|
1906 | + $req = pack('nnN', self::SEARCHD_COMMAND_UPDATE, self::VER_COMMAND_UPDATE, $len) . $req; // add header |
|
1907 | + if (!$this->send($fp, $req, $len + 8)) { |
|
1908 | + $this->mbPop(); |
|
1909 | + return -1; |
|
1910 | + } |
|
1911 | + |
|
1912 | + if (!($response = $this->getResponse($fp, self::VER_COMMAND_UPDATE))) { |
|
1913 | + $this->mbPop(); |
|
1914 | + return -1; |
|
1915 | + } |
|
1916 | + |
|
1917 | + // parse response |
|
1918 | + list(, $updated) = unpack('N*', substr($response, 0, 4)); |
|
1919 | + $this->mbPop(); |
|
1920 | + return $updated; |
|
1921 | + } |
|
1922 | + |
|
1923 | + ///////////////////////////////////////////////////////////////////////////// |
|
1924 | + // persistent connections |
|
1925 | + ///////////////////////////////////////////////////////////////////////////// |
|
1926 | + |
|
1927 | + /** |
|
1928 | + * @return bool |
|
1929 | + */ |
|
1930 | + public function open() |
|
1931 | + { |
|
1932 | + if ($this->socket !== false) { |
|
1933 | + $this->error = 'already connected'; |
|
1934 | + return false; |
|
1935 | + } |
|
1936 | + if (($fp = $this->connect()) === false) |
|
1937 | + return false; |
|
1938 | + |
|
1939 | + // command, command version = 0, body length = 4, body = 1 |
|
1940 | + $req = pack('nnNN', self::SEARCHD_COMMAND_PERSIST, 0, 4, 1); |
|
1941 | + if (!$this->send($fp, $req, 12)) { |
|
1942 | + return false; |
|
1943 | + } |
|
1944 | + |
|
1945 | + $this->socket = $fp; |
|
1946 | + return true; |
|
1947 | + } |
|
1948 | + |
|
1949 | + /** |
|
1950 | + * @return bool |
|
1951 | + */ |
|
1952 | + public function close() |
|
1953 | + { |
|
1954 | + if ($this->socket === false) { |
|
1955 | + $this->error = 'not connected'; |
|
1956 | + return false; |
|
1957 | + } |
|
1958 | + |
|
1959 | + fclose($this->socket); |
|
1960 | + $this->socket = false; |
|
1961 | + |
|
1962 | + return true; |
|
1963 | + } |
|
1964 | + |
|
1965 | + ////////////////////////////////////////////////////////////////////////// |
|
1966 | + // status |
|
1967 | + ////////////////////////////////////////////////////////////////////////// |
|
1968 | + |
|
1969 | + /** |
|
1970 | + * @param bool $session |
|
1971 | + * |
|
1972 | + * @return array|bool |
|
1973 | + */ |
|
1974 | + public function status($session = false) |
|
1975 | + { |
|
1976 | + assert(is_bool($session)); |
|
1977 | + |
|
1978 | + $this->mbPush(); |
|
1979 | + if (($fp = $this->connect()) === false) { |
|
1980 | + $this->mbPop(); |
|
1981 | + return false; |
|
1982 | + } |
|
1983 | + |
|
1984 | + // len=4, body=1 |
|
1985 | + $req = pack('nnNN', self::SEARCHD_COMMAND_STATUS, self::VER_COMMAND_STATUS, 4, $session ? 0 : 1); |
|
1986 | + if (!$this->send($fp, $req, 12) || !($response = $this->getResponse($fp, self::VER_COMMAND_STATUS))) { |
|
1987 | + $this->mbPop(); |
|
1988 | + return false; |
|
1989 | + } |
|
1990 | + |
|
1991 | + $res = substr($response, 4); // just ignore length, error handling, etc |
|
1992 | + $p = 0; |
|
1993 | + list($rows, $cols) = array_values(unpack('N*N*', substr($response, $p, 8))); |
|
1994 | + $p += 8; |
|
1995 | + |
|
1996 | + $res = array(); |
|
1997 | + for ($i = 0; $i < $rows; $i++) { |
|
1998 | + for ($j = 0; $j < $cols; $j++) { |
|
1999 | + list(, $len) = unpack('N*', substr($response, $p, 4)); |
|
2000 | + $p += 4; |
|
2001 | + $res[$i][] = substr($response, $p, $len); |
|
2002 | + $p += $len; |
|
2003 | + } |
|
2004 | + } |
|
2005 | + |
|
2006 | + $this->mbPop(); |
|
2007 | + return $res; |
|
2008 | + } |
|
2009 | + |
|
2010 | + ////////////////////////////////////////////////////////////////////////// |
|
2011 | + // flush |
|
2012 | + ////////////////////////////////////////////////////////////////////////// |
|
2013 | + |
|
2014 | + /** |
|
2015 | + * @return int |
|
2016 | + */ |
|
2017 | + public function flushAttributes() |
|
2018 | + { |
|
2019 | + $this->mbPush(); |
|
2020 | + if (($fp = $this->connect()) === false) { |
|
2021 | + $this->mbPop(); |
|
2022 | + return -1; |
|
2023 | + } |
|
2024 | + |
|
2025 | + $req = pack('nnN', self::SEARCHD_COMMAND_FLUSH_ATTRS, self::VER_COMMAND_FLUSH_ATTRS, 0); // len=0 |
|
2026 | + if (!$this->send($fp, $req, 8) || !($response = $this->getResponse($fp, self::VER_COMMAND_FLUSH_ATTRS))) { |
|
2027 | + $this->mbPop(); |
|
2028 | + return -1; |
|
2029 | + } |
|
2030 | + |
|
2031 | + $tag = -1; |
|
2032 | + if (strlen($response) == 4) { |
|
2033 | + list(, $tag) = unpack('N*', $response); |
|
2034 | + } else { |
|
2035 | + $this->error = 'unexpected response length'; |
|
2036 | + } |
|
2037 | + |
|
2038 | + $this->mbPop(); |
|
2039 | + return $tag; |
|
2040 | + } |
|
2041 | 2041 | } |