Zend_Search_Lucene_Index_SegmentInfo::termFreqs()   D
last analyzed

Complexity

Conditions 21
Paths 10

Size

Total Lines 117
Code Lines 74

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 204.3069
Metric Value
cc 21
eloc 74
nc 10
nop 3
dl 0
loc 117
ccs 17
cts 67
cp 0.2537
crap 204.3069
rs 4.6955

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Zend Framework
4
 *
5
 * LICENSE
6
 *
7
 * This source file is subject to the new BSD license that is bundled
8
 * with this package in the file LICENSE.txt.
9
 * It is also available through the world-wide-web at this URL:
10
 * http://framework.zend.com/license/new-bsd
11
 * If you did not receive a copy of the license and are unable to
12
 * obtain it through the world-wide-web, please send an email
13
 * to [email protected] so we can send you a copy immediately.
14
 *
15
 * @category   Zend
16
 * @package    Zend_Search_Lucene
17
 * @subpackage Index
18
 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
20
 * @version    $Id: SegmentInfo.php 24593 2012-01-05 20:35:02Z matthew $
21
 */
22
23
/** Zend_Search_Lucene_Index_TermsStream_Interface */
24 1
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
25
26
27
/** Zend_Search_Lucene_Search_Similarity */
28 1
require_once 'Zend/Search/Lucene/Search/Similarity.php';
29
30
/** Zend_Search_Lucene_Index_FieldInfo */
31 1
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
32
33
/** Zend_Search_Lucene_Index_Term */
34 1
require_once 'Zend/Search/Lucene/Index/Term.php';
35
36
/** Zend_Search_Lucene_Index_TermInfo */
37 1
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
38
39
/**
40
 * @category   Zend
41
 * @package    Zend_Search_Lucene
42
 * @subpackage Index
43
 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
44
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
45
 */
46
class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
47
{
48
    /**
49
     * "Full scan vs fetch" boundary.
50
     *
51
     * If filter selectivity is less than this value, then full scan is performed
52
     * (since term entries fetching has some additional overhead).
53
     */
54
    const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
55
56
    /**
57
     * Number of docs in a segment
58
     *
59
     * @var integer
60
     */
61
    private $_docCount;
62
63
    /**
64
     * Segment name
65
     *
66
     * @var string
67
     */
68
    private $_name;
69
70
    /**
71
     * Term Dictionary Index
72
     *
73
     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
74
     * of performance considerations)
75
     * [0] -> $termValue
76
     * [1] -> $termFieldNum
77
     *
78
     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
79
     *
80
     * @var array
81
     */
82
    private $_termDictionary;
83
84
    /**
85
     * Term Dictionary Index TermInfos
86
     *
87
     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
88
     * of performance considerations)
89
     * [0] -> $docFreq
90
     * [1] -> $freqPointer
91
     * [2] -> $proxPointer
92
     * [3] -> $skipOffset
93
     * [4] -> $indexPointer
94
     *
95
     * @var array
96
     */
97
    private $_termDictionaryInfos;
98
99
    /**
100
     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
101
     *
102
     * @var array
103
     */
104
    private $_fields;
105
106
    /**
107
     * Field positions in a dictionary.
108
     * (Term dictionary contains filelds ordered by names)
109
     *
110
     * @var array
111
     */
112
    private $_fieldsDicPositions;
113
114
115
    /**
116
     * Associative array where the key is the file name and the value is data offset
117
     * in a compound segment file (.csf).
118
     *
119
     * @var array
120
     */
121
    private $_segFiles;
122
123
    /**
124
     * Associative array where the key is the file name and the value is file size (.csf).
125
     *
126
     * @var array
127
     */
128
    private $_segFileSizes;
129
130
    /**
131
     * Delete file generation number
132
     *
133
     * -2 means autodetect latest delete generation
134
     * -1 means 'there is no delete file'
135
     *  0 means pre-2.1 format delete file
136
     *  X specifies used delete file
137
     *
138
     * @var integer
139
     */
140
    private $_delGen;
141
142
    /**
143
     * Segment has single norms file
144
     *
145
     * If true then one .nrm file is used for all fields
146
     * Otherwise .fN files are used
147
     *
148
     * @var boolean
149
     */
150
    private $_hasSingleNormFile;
151
152
    /**
153
     * Use compound segment file (*.cfs) to collect all other segment files
154
     * (excluding .del files)
155
     *
156
     * @var boolean
157
     */
158
    private $_isCompound;
159
160
161
    /**
162
     * File system adapter.
163
     *
164
     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
165
     */
166
    private $_directory;
167
168
    /**
169
     * Normalization factors.
170
     * An array fieldName => normVector
171
     * normVector is a binary string.
172
     * Each byte corresponds to an indexed document in a segment and
173
     * encodes normalization factor (float value, encoded by
174
     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
175
     *
176
     * @var array
177
     */
178
    private $_norms = array();
179
180
    /**
181
     * List of deleted documents.
182
     * bitset if bitset extension is loaded or array otherwise.
183
     *
184
     * @var mixed
185
     */
186
    private $_deleted = null;
187
188
    /**
189
     * $this->_deleted update flag
190
     *
191
     * @var boolean
192
     */
193
    private $_deletedDirty = false;
194
195
    /**
196
     * True if segment uses shared doc store
197
     *
198
     * @var boolean
199
     */
200
    private $_usesSharedDocStore;
201
202
    /*
203
     * Shared doc store options.
204
     * It's an assotiative array with the following items:
205
     * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
206
     * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
207
     * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
208
     */
209
    private $_sharedDocStoreOptions;
210
211
212
    /**
213
     * Zend_Search_Lucene_Index_SegmentInfo constructor
214
     *
215
     * @param Zend_Search_Lucene_Storage_Directory $directory
216
     * @param string     $name
217
     * @param integer    $docCount
218
     * @param integer    $delGen
219
     * @param array|null $docStoreOptions
220
     * @param boolean    $hasSingleNormFile
221
     * @param boolean    $isCompound
222
     */
223 43
    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
224
    {
225 43
        $this->_directory = $directory;
0 ignored issues
show
Documentation Bug introduced by
$directory is of type object<Zend_Search_Lucene_Storage_Directory>, but the property $_directory was declared to be of type object<Zend_Search_Lucen...e_Directory_Filesystem>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
226 43
        $this->_name      = $name;
227 43
        $this->_docCount  = $docCount;
228
229 43
        if ($docStoreOptions !== null) {
230
            $this->_usesSharedDocStore    = true;
231
            $this->_sharedDocStoreOptions = $docStoreOptions;
232
233
            if ($docStoreOptions['isCompound']) {
234
                $cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
235
                $cfxFilesCount = $cfxFile->readVInt();
236
237
                $cfxFiles     = array();
238
                $cfxFileSizes = array();
239
240
                for ($count = 0; $count < $cfxFilesCount; $count++) {
241
                    $dataOffset = $cfxFile->readLong();
242
                    if ($count != 0) {
243
                        $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
244
                    }
245
                    $fileName            = $cfxFile->readString();
246
                    $cfxFiles[$fileName] = $dataOffset;
247
                }
248
                if ($count != 0) {
249
                    $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
250
                }
251
252
                $this->_sharedDocStoreOptions['files']     = $cfxFiles;
253
                $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
254
            }
255
        }
256
257 43
        $this->_hasSingleNormFile = $hasSingleNormFile;
258 43
        $this->_delGen            = $delGen;
259 43
        $this->_termDictionary    = null;
260
261
262 43
        if ($isCompound !== null) {
263 43
            $this->_isCompound    = $isCompound;
264
        } else {
265
            // It's a pre-2.1 segment or isCompound is set to 'unknown'
266
            // Detect if segment uses compound file
267
            require_once 'Zend/Search/Lucene/Exception.php';
268
            try {
269
                // Try to open compound file
270
                $this->_directory->getFileObject($name . '.cfs');
271
272
                // Compound file is found
273
                $this->_isCompound = true;
274
            } catch (Zend_Search_Lucene_Exception $e) {
275
                if (strpos($e->getMessage(), 'is not readable') !== false) {
276
                    // Compound file is not found or is not readable
277
                    $this->_isCompound = false;
278
                } else {
279
                    throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
280
                }
281
            }
282
        }
283
284 43
        $this->_segFiles = array();
285 43
        if ($this->_isCompound) {
286 43
            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
287 43
            $segFilesCount = $cfsFile->readVInt();
288
289 43
            for ($count = 0; $count < $segFilesCount; $count++) {
290 43
                $dataOffset = $cfsFile->readLong();
291 43
                if ($count != 0) {
292 43
                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
293
                }
294 43
                $fileName = $cfsFile->readString();
295 43
                $this->_segFiles[$fileName] = $dataOffset;
296
            }
297 43
            if ($count != 0) {
298 43
                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
299
            }
300
        }
301
302 43
        $fnmFile = $this->openCompoundFile('.fnm');
303 43
        $fieldsCount = $fnmFile->readVInt();
304 43
        $fieldNames = array();
305 43
        $fieldNums  = array();
306 43
        $this->_fields = array();
307
308 43
        for ($count=0; $count < $fieldsCount; $count++) {
309 43
            $fieldName = $fnmFile->readString();
310 43
            $fieldBits = $fnmFile->readByte();
311 43
            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
312 43
                                                                            $fieldBits & 0x01 /* field is indexed */,
313
                                                                            $count,
314 43
                                                                            $fieldBits & 0x02 /* termvectors are stored */,
315 43
                                                                            $fieldBits & 0x10 /* norms are omitted */,
316 43
                                                                            $fieldBits & 0x20 /* payloads are stored */);
317 43
            if ($fieldBits & 0x10) {
318
                // norms are omitted for the indexed field
319
                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
320
            }
321
322 43
            $fieldNums[$count]  = $count;
323 43
            $fieldNames[$count] = $fieldName;
324
        }
325 43
        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
326 43
        $this->_fieldsDicPositions = array_flip($fieldNums);
327
328 43
        if ($this->_delGen == -2) {
329
            // SegmentInfo constructor is invoked from index writer
330
            // Autodetect current delete file generation number
331
            $this->_delGen = $this->_detectLatestDelGen();
332
        }
333
334
        // Load deletions
335 43
        $this->_deleted = $this->_loadDelFile();
336 43
    }
337
338
    /**
339
     * Load detetions file
340
     *
341
     * Returns bitset or an array depending on bitset extension availability
342
     *
343
     * @return mixed
344
     * @throws Zend_Search_Lucene_Exception
345
     */
346 43
    private function _loadDelFile()
347
    {
348 43
        if ($this->_delGen == -1) {
349
            // There is no delete file for this segment
350 43
            return null;
351 40
        } else if ($this->_delGen == 0) {
352
            // It's a segment with pre-2.1 format delete file
353
            // Try to load deletions file
354
            return $this->_loadPre21DelFile();
355
        } else {
356
            // It's 2.1+ format deleteions file
357 40
            return $this->_load21DelFile();
358
        }
359
    }
360
361
    /**
362
     * Load pre-2.1 detetions file
363
     *
364
     * Returns bitset or an array depending on bitset extension availability
365
     *
366
     * @return mixed
367
     * @throws Zend_Search_Lucene_Exception
368
     */
369
    private function _loadPre21DelFile()
370
    {
371
        require_once 'Zend/Search/Lucene/Exception.php';
372
        try {
373
            // '.del' files always stored in a separate file
374
            // Segment compound is not used
375
            $delFile = $this->_directory->getFileObject($this->_name . '.del');
376
377
            $byteCount = $delFile->readInt();
378
            $byteCount = ceil($byteCount/8);
379
            $bitCount  = $delFile->readInt();
380
381
            if ($bitCount == 0) {
382
                $delBytes = '';
383
            } else {
384
                $delBytes = $delFile->readBytes($byteCount);
385
            }
386
387
            if (extension_loaded('bitset')) {
388
                return $delBytes;
389
            } else {
390
                $deletions = array();
391
                for ($count = 0; $count < $byteCount; $count++) {
392
                    $byte = ord($delBytes[$count]);
393
                    for ($bit = 0; $bit < 8; $bit++) {
394
                        if ($byte & (1<<$bit)) {
395
                            $deletions[$count*8 + $bit] = 1;
396
                        }
397
                    }
398
                }
399
400
                return $deletions;
401
            }
402
        } catch(Zend_Search_Lucene_Exception $e) {
403
            if (strpos($e->getMessage(), 'is not readable') === false) {
404
                throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
405
            }
406
            // There is no deletion file
407
            $this->_delGen = -1;
408
409
            return null;
410
        }
411
    }
412
413
    /**
414
     * Load 2.1+ format detetions file
415
     *
416
     * Returns bitset or an array depending on bitset extension availability
417
     *
418
     * @return mixed
419
     */
420 40
    private function _load21DelFile()
421
    {
422 40
        $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
423
424 40
        $format = $delFile->readInt();
425
426 40
        if ($format == (int)0xFFFFFFFF) {
427
            if (extension_loaded('bitset')) {
428
                $deletions = bitset_empty();
429
            } else {
430
                $deletions = array();
431
            }
432
433
            $byteCount = $delFile->readInt();
434
            $bitCount  = $delFile->readInt();
435
436
            $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
437
            $byteNum = 0;
438
439
            do {
0 ignored issues
show
Unused Code introduced by
do { $dgap = $delFil...tell() < $delFileSize); does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
440
                $dgap = $delFile->readVInt();
441
                $nonZeroByte = $delFile->readByte();
442
443
                $byteNum += $dgap;
444
445
446
                if (extension_loaded('bitset')) {
447
                    for ($bit = 0; $bit < 8; $bit++) {
448
                        if ($nonZeroByte & (1<<$bit)) {
449
                            bitset_incl($deletions, $byteNum*8 + $bit);
450
                        }
451
                    }
452
                    return $deletions;
453
                } else {
454
                    for ($bit = 0; $bit < 8; $bit++) {
455
                        if ($nonZeroByte & (1<<$bit)) {
456
                            $deletions[$byteNum*8 + $bit] = 1;
457
                        }
458
                    }
459
                    return (count($deletions) > 0) ? $deletions : null;
460
                }
461
462
            } while ($delFile->tell() < $delFileSize);
0 ignored issues
show
Bug introduced by
The variable $delFile seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $delFileSize seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
463
        } else {
464
            // $format is actually byte count
465 40
            $byteCount = ceil($format/8);
466 40
            $bitCount  = $delFile->readInt();
467
468 40
            if ($bitCount == 0) {
469
                $delBytes = '';
470
            } else {
471 40
                $delBytes = $delFile->readBytes($byteCount);
472
            }
473
474 40
            if (extension_loaded('bitset')) {
475
                return $delBytes;
476
            } else {
477 40
                $deletions = array();
478 40
                for ($count = 0; $count < $byteCount; $count++) {
479 40
                    $byte = ord($delBytes[$count]);
480 40
                    for ($bit = 0; $bit < 8; $bit++) {
481 40
                        if ($byte & (1<<$bit)) {
482 40
                            $deletions[$count*8 + $bit] = 1;
483
                        }
484
                    }
485
                }
486
487 40
                return (count($deletions) > 0) ? $deletions : null;
488
            }
489
        }
490
    }
491
492
    /**
493
     * Opens index file stoted within compound index file
494
     *
495
     * @param string $extension
496
     * @param boolean $shareHandler
497
     * @throws Zend_Search_Lucene_Exception
498
     * @return Zend_Search_Lucene_Storage_File
499
     */
500 43
    public function openCompoundFile($extension, $shareHandler = true)
501
    {
502 43
        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
503
            $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
504
            $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
505
506
            if (!$this->_sharedDocStoreOptions['isCompound']) {
507
                $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
508
                $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
509
510
                if ($extension == '.fdx') {
511
                    // '.fdx' file is requested
512
                    return $fdxFile;
513
                } else {
514
                    // '.fdt' file is requested
515
                    $fdtStartOffset = $fdxFile->readLong();
516
517
                    $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
518
                    $fdtFile->seek($fdtStartOffset, SEEK_CUR);
519
520
                    return $fdtFile;
521
                }
522
            }
523
524
            if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
525
                require_once 'Zend/Search/Lucene/Exception.php';
526
                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
527
                                       . $fdxFName . ' file.' );
528
            }
529
            if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
530
                require_once 'Zend/Search/Lucene/Exception.php';
531
                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
532
                                       . $fdtFName . ' file.' );
533
            }
534
535
            // Open shared docstore segment file
536
            $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
537
            // Seek to the start of '.fdx' file within compound file
538
            $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
539
            // Seek to the start of current segment documents section
540
            $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
541
542
            if ($extension == '.fdx') {
543
                // '.fdx' file is requested
544
                return $cfxFile;
545
            } else {
546
                // '.fdt' file is requested
547
                $fdtStartOffset = $cfxFile->readLong();
548
549
                // Seek to the start of '.fdt' file within compound file
550
                $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
551
                // Seek to the start of current segment documents section
552
                $cfxFile->seek($fdtStartOffset, SEEK_CUR);
553
554
                return $fdtFile;
0 ignored issues
show
Bug introduced by
The variable $fdtFile seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
555
            }
556
        }
557
558 43
        $filename = $this->_name . $extension;
559
560 43
        if (!$this->_isCompound) {
561
            return $this->_directory->getFileObject($filename, $shareHandler);
562
        }
563
564 43
        if( !isset($this->_segFiles[$filename]) ) {
565
            require_once 'Zend/Search/Lucene/Exception.php';
566
            throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
567
                                       . $filename . ' file.' );
568
        }
569
570 43
        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
571 43
        $file->seek($this->_segFiles[$filename]);
572 43
        return $file;
573
    }
574
575
    /**
576
     * Get compound file length
577
     *
578
     * @param string $extension
579
     * @return integer
580
     */
581 18
    public function compoundFileLength($extension)
582
    {
583 18
        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
584
            $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
585
586
            if (!$this->_sharedDocStoreOptions['isCompound']) {
587
                return $this->_directory->fileLength($filename);
588
            }
589
590
            if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
591
                require_once 'Zend/Search/Lucene/Exception.php';
592
                throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
593
                                           . $filename . ' file.' );
594
            }
595
596
            return $this->_sharedDocStoreOptions['fileSizes'][$filename];
597
        }
598
599
600 18
        $filename = $this->_name . $extension;
601
602
        // Try to get common file first
603 18
        if ($this->_directory->fileExists($filename)) {
604
            return $this->_directory->fileLength($filename);
605
        }
606
607 18
        if( !isset($this->_segFileSizes[$filename]) ) {
608
            require_once 'Zend/Search/Lucene/Exception.php';
609
            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
610
                                       . $filename . ' file.' );
611
        }
612
613 18
        return $this->_segFileSizes[$filename];
614
    }
615
616
    /**
617
     * Returns field index or -1 if field is not found
618
     *
619
     * @param string $fieldName
620
     * @return integer
621
     */
622 42
    public function getFieldNum($fieldName)
623
    {
624 42
        foreach( $this->_fields as $field ) {
625 42
            if( $field->name == $fieldName ) {
626 42
                return $field->number;
627
            }
628
        }
629
630
        return -1;
631
    }
632
633
    /**
634
     * Returns field info for specified field
635
     *
636
     * @param integer $fieldNum
637
     * @return Zend_Search_Lucene_Index_FieldInfo
638
     */
639
    public function getField($fieldNum)
640
    {
641
        return $this->_fields[$fieldNum];
642
    }
643
644
    /**
645
     * Returns array of fields.
646
     * if $indexed parameter is true, then returns only indexed fields.
647
     *
648
     * @param boolean $indexed
649
     * @return array
650
     */
651
    public function getFields($indexed = false)
652
    {
653
        $result = array();
654
        foreach( $this->_fields as $field ) {
655
            if( (!$indexed) || $field->isIndexed ) {
656
                $result[ $field->name ] = $field->name;
657
            }
658
        }
659
        return $result;
660
    }
661
662
    /**
663
     * Returns array of FieldInfo objects.
664
     *
665
     * @return array
666
     */
667
    public function getFieldInfos()
668
    {
669
        return $this->_fields;
670
    }
671
672
    /**
673
     * Returns actual deletions file generation number.
674
     *
675
     * @return integer
676
     */
677 20
    public function getDelGen()
678
    {
679 20
        return $this->_delGen;
680
    }
681
682
    /**
683
     * Returns the total number of documents in this segment (including deleted documents).
684
     *
685
     * @return integer
686
     */
687 23
    public function count()
688
    {
689 23
        return $this->_docCount;
690
    }
691
692
    /**
693
     * Returns number of deleted documents.
694
     *
695
     * @return integer
696
     */
697
    private function _deletedCount()
698
    {
699
        if ($this->_deleted === null) {
700
            return 0;
701
        }
702
703
        if (extension_loaded('bitset')) {
704
            return count(bitset_to_array($this->_deleted));
705
        } else {
706
            return count($this->_deleted);
707
        }
708
    }
709
710
    /**
711
     * Returns the total number of non-deleted documents in this segment.
712
     *
713
     * @return integer
714
     */
715
    public function numDocs()
716
    {
717
        if ($this->hasDeletions()) {
718
            return $this->_docCount - $this->_deletedCount();
719
        } else {
720
            return $this->_docCount;
721
        }
722
    }
723
724
    /**
725
     * Get field position in a fields dictionary
726
     *
727
     * @param integer $fieldNum
728
     * @return integer
729
     */
730 42
    private function _getFieldPosition($fieldNum) {
731
        // Treat values which are not in a translation table as a 'direct value'
732 42
        return isset($this->_fieldsDicPositions[$fieldNum]) ?
733 42
                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
734
    }
735
736
    /**
737
     * Return segment name
738
     *
739
     * @return string
740
     */
741 18
    public function getName()
742
    {
743 18
        return $this->_name;
744
    }
745
746
747
    /**
748
     * TermInfo cache
749
     *
750
     * Size is 1024.
751
     * Numbers are used instead of class constants because of performance considerations
752
     *
753
     * @var array
754
     */
755
    private $_termInfoCache = array();
756
757
    private function _cleanUpTermInfoCache()
758
    {
759
        // Clean 256 term infos
760
        foreach ($this->_termInfoCache as $key => $termInfo) {
761
            unset($this->_termInfoCache[$key]);
762
763
            // leave 768 last used term infos
764
            if (count($this->_termInfoCache) == 768) {
765
                break;
766
            }
767
        }
768
    }
769
770
    /**
771
     * Load terms dictionary index
772
     *
773
     * @throws Zend_Search_Lucene_Exception
774
     */
775 42
    private function _loadDictionaryIndex()
776
    {
777
        // Check, if index is already serialized
778 42
        if ($this->_directory->fileExists($this->_name . '.sti')) {
779
            // Load serialized dictionary index data
780 41
            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
781 41
            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
782
783
            // Load dictionary index data
784 41
            if (($unserializedData = @unserialize($stiFileData)) !== false) {
785 41
                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
786 41
                return;
787
            }
788
        }
789
790
        // Load data from .tii file and generate .sti file
791
792
        // Prefetch dictionary index data
793 18
        $tiiFile = $this->openCompoundFile('.tii');
794 18
        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
795
796
        /** Zend_Search_Lucene_Index_DictionaryLoader */
797 18
        require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
798
799
        // Load dictionary index data
800 18
        list($this->_termDictionary, $this->_termDictionaryInfos) =
801 18
                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
802
803 18
        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
804 18
        $stiFile = $this->_directory->createFile($this->_name . '.sti');
805 18
        $stiFile->writeBytes($stiFileData);
806 18
    }
807
808
    /**
809
     * Scans terms dictionary and returns term info
810
     *
811
     * @param Zend_Search_Lucene_Index_Term $term
812
     * @return Zend_Search_Lucene_Index_TermInfo
813
     */
814 42
    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
815
    {
816 42
        $termKey = $term->key();
817 42
        if (isset($this->_termInfoCache[$termKey])) {
818 3
            $termInfo = $this->_termInfoCache[$termKey];
819
820
            // Move termInfo to the end of cache
821 3
            unset($this->_termInfoCache[$termKey]);
822 3
            $this->_termInfoCache[$termKey] = $termInfo;
823
824 3
            return $termInfo;
825
        }
826
827
828 42
        if ($this->_termDictionary === null) {
829 42
            $this->_loadDictionaryIndex();
830
        }
831
832 42
        $searchField = $this->getFieldNum($term->field);
833
834 42
        if ($searchField == -1) {
835
            return null;
836
        }
837 42
        $searchDicField = $this->_getFieldPosition($searchField);
838
839
        // search for appropriate value in dictionary
840 42
        $lowIndex = 0;
841 42
        $highIndex = count($this->_termDictionary)-1;
842 42
        while ($highIndex >= $lowIndex) {
843
            // $mid = ($highIndex - $lowIndex)/2;
844 42
            $mid = ($highIndex + $lowIndex) >> 1;
845 42
            $midTerm = $this->_termDictionary[$mid];
846
847 42
            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
848 42
            $delta = $searchDicField - $fieldNum;
849 42
            if ($delta == 0) {
850
                $delta = strcmp($term->text, $midTerm[1] /* text */);
851
            }
852
853 42
            if ($delta < 0) {
854
                $highIndex = $mid-1;
855 42
            } elseif ($delta > 0) {
856 42
                $lowIndex  = $mid+1;
857
            } else {
858
                // return $this->_termDictionaryInfos[$mid]; // We got it!
859
                $a = $this->_termDictionaryInfos[$mid];
860
                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
861
862
                // Put loaded termInfo into cache
863
                $this->_termInfoCache[$termKey] = $termInfo;
864
865
                return $termInfo;
866
            }
867
        }
868
869 42
        if ($highIndex == -1) {
870
            // Term is out of the dictionary range
871
            return null;
872
        }
873
874 42
        $prevPosition = $highIndex;
875 42
        $prevTerm = $this->_termDictionary[$prevPosition];
876 42
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
877
878 42
        $tisFile = $this->openCompoundFile('.tis');
879 42
        $tiVersion = $tisFile->readInt();
880 42
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
881 42
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
882
            require_once 'Zend/Search/Lucene/Exception.php';
883
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
884
        }
885
886 42
        $termCount     = $tisFile->readLong();
887 42
        $indexInterval = $tisFile->readInt();
888 42
        $skipInterval  = $tisFile->readInt();
889 42
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
890 42
            $maxSkipLevels = $tisFile->readInt();
891
        }
892
893 42
        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
894
895 42
        $termValue    = $prevTerm[1] /* text */;
896 42
        $termFieldNum = $prevTerm[0] /* field */;
897 42
        $freqPointer = $prevTermInfo[1] /* freqPointer */;
898 42
        $proxPointer = $prevTermInfo[2] /* proxPointer */;
899 42
        for ($count = $prevPosition*$indexInterval + 1;
900 42
             $count <= $termCount &&
901 42
             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
902 42
              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
903 42
               strcmp($termValue, $term->text) < 0) );
904
             $count++) {
905 42
            $termPrefixLength = $tisFile->readVInt();
906 42
            $termSuffix       = $tisFile->readString();
907 42
            $termFieldNum     = $tisFile->readVInt();
908 42
            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
909
910 42
            $docFreq      = $tisFile->readVInt();
911 42
            $freqPointer += $tisFile->readVInt();
912 42
            $proxPointer += $tisFile->readVInt();
913 42
            if( $docFreq >= $skipInterval ) {
914
                $skipOffset = $tisFile->readVInt();
915
            } else {
916 42
                $skipOffset = 0;
917
            }
918
        }
919
920 42
        if ($termFieldNum == $searchField && $termValue == $term->text) {
921 3
            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
922
        } else {
923 42
            $termInfo = null;
924
        }
925
926
        // Put loaded termInfo into cache
927 42
        $this->_termInfoCache[$termKey] = $termInfo;
928
929 42
        if (count($this->_termInfoCache) == 1024) {
930
            $this->_cleanUpTermInfoCache();
931
        }
932
933 42
        return $termInfo;
934
    }
935
936
    /**
937
     * Returns IDs of all the documents containing term.
938
     *
939
     * @param Zend_Search_Lucene_Index_Term $term
940
     * @param integer $shift
941
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
942
     * @return array
943
     */
944 3
    public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
945
    {
946 3
        $termInfo = $this->getTermInfo($term);
947
948 3
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
949 3
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
950
                $docsFilter->segmentFilters[$this->_name] = array();
951
            }
952 3
            return array();
953
        }
954
955 3
        $frqFile = $this->openCompoundFile('.frq');
956 3
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
957 3
        $docId  = 0;
958 3
        $result = array();
959
960 3
        if ($docsFilter !== null) {
961
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
962
                require_once 'Zend/Search/Lucene/Exception.php';
963
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
964
            }
965
966
            if (isset($docsFilter->segmentFilters[$this->_name])) {
967
                // Filter already has some data for the current segment
968
969
                // Make short name for the filter (which doesn't need additional dereferencing)
970
                $filter = &$docsFilter->segmentFilters[$this->_name];
971
972
                // Check if filter is not empty
973
                if (count($filter) == 0) {
974
                    return array();
975
                }
976
977
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
978
                    // Perform fetching
979
// ---------------------------------------------------------------
980
                    $updatedFilterData = array();
981
982
                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
983
                        $docDelta = $frqFile->readVInt();
984
                        if( $docDelta % 2 == 1 ) {
985
                            $docId += ($docDelta-1)/2;
986
                        } else {
987
                            $docId += $docDelta/2;
988
                            // read freq
989
                            $frqFile->readVInt();
990
                        }
991
992
                        if (isset($filter[$docId])) {
993
                           $result[] = $shift + $docId;
994
                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
995
                        }
996
                    }
997
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
998
// ---------------------------------------------------------------
999
                } else {
1000
                    // Perform full scan
1001
                    $updatedFilterData = array();
1002
1003
                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1004
                        $docDelta = $frqFile->readVInt();
1005
                        if( $docDelta % 2 == 1 ) {
1006
                            $docId += ($docDelta-1)/2;
1007
                        } else {
1008
                            $docId += $docDelta/2;
1009
                            // read freq
1010
                            $frqFile->readVInt();
1011
                        }
1012
1013
                        if (isset($filter[$docId])) {
1014
                           $result[] = $shift + $docId;
1015
                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1016
                        }
1017
                    }
1018
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1019
                }
1020
            } else {
1021
                // Filter is present, but doesn't has data for the current segment yet
1022
                $filterData = array();
1023
                for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1024
                    $docDelta = $frqFile->readVInt();
1025
                    if( $docDelta % 2 == 1 ) {
1026
                        $docId += ($docDelta-1)/2;
1027
                    } else {
1028
                        $docId += $docDelta/2;
1029
                        // read freq
1030
                        $frqFile->readVInt();
1031
                    }
1032
1033
                    $result[] = $shift + $docId;
1034
                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1035
                }
1036
                $docsFilter->segmentFilters[$this->_name] = $filterData;
1037
            }
1038
        } else {
1039 3
            for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1040 3
                $docDelta = $frqFile->readVInt();
1041 3
                if( $docDelta % 2 == 1 ) {
1042 3
                    $docId += ($docDelta-1)/2;
1043
                } else {
1044
                    $docId += $docDelta/2;
1045
                    // read freq
1046
                    $frqFile->readVInt();
1047
                }
1048
1049 3
                $result[] = $shift + $docId;
1050
            }
1051
        }
1052
1053 3
        return $result;
1054
    }
1055
1056
    /**
1057
     * Returns term freqs array.
1058
     * Result array structure: array(docId => freq, ...)
1059
     *
1060
     * @param Zend_Search_Lucene_Index_Term $term
1061
     * @param integer $shift
1062
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1063
     * @return Zend_Search_Lucene_Index_TermInfo
1064
     */
1065 3
    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1066
    {
1067 3
        $termInfo = $this->getTermInfo($term);
1068
1069 3
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1070 3
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1071
                $docsFilter->segmentFilters[$this->_name] = array();
1072
            }
1073 3
            return array();
1074
        }
1075
1076 3
        $frqFile = $this->openCompoundFile('.frq');
1077 3
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1078 3
        $result = array();
1079 3
        $docId = 0;
1080
1081 3
        $result = array();
1082
1083 3
        if ($docsFilter !== null) {
1084
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1085
                require_once 'Zend/Search/Lucene/Exception.php';
1086
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1087
            }
1088
1089
            if (isset($docsFilter->segmentFilters[$this->_name])) {
1090
                // Filter already has some data for the current segment
1091
1092
                // Make short name for the filter (which doesn't need additional dereferencing)
1093
                $filter = &$docsFilter->segmentFilters[$this->_name];
1094
1095
                // Check if filter is not empty
1096
                if (count($filter) == 0) {
1097
                    return array();
1098
                }
1099
1100
1101
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1102
                    // Perform fetching
1103
// ---------------------------------------------------------------
1104
                    $updatedFilterData = array();
1105
1106
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1107
                        $docDelta = $frqFile->readVInt();
1108
                        if ($docDelta % 2 == 1) {
1109
                            $docId += ($docDelta-1)/2;
1110
                            if (isset($filter[$docId])) {
1111
                                $result[$shift + $docId] = 1;
1112
                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1113
                            }
1114
                        } else {
1115
                            $docId += $docDelta/2;
1116
                            $freq = $frqFile->readVInt();
1117
                            if (isset($filter[$docId])) {
1118
                                $result[$shift + $docId] = $freq;
1119
                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1120
                            }
1121
                        }
1122
                    }
1123
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1124
// ---------------------------------------------------------------
1125
                } else {
1126
                    // Perform full scan
1127
                    $updatedFilterData = array();
1128
1129
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1130
                        $docDelta = $frqFile->readVInt();
1131
                        if ($docDelta % 2 == 1) {
1132
                            $docId += ($docDelta-1)/2;
1133
                            if (isset($filter[$docId])) {
1134
                                $result[$shift + $docId] = 1;
1135
                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1136
                            }
1137
                        } else {
1138
                            $docId += $docDelta/2;
1139
                            $freq = $frqFile->readVInt();
1140
                            if (isset($filter[$docId])) {
1141
                                $result[$shift + $docId] = $freq;
1142
                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1143
                            }
1144
                        }
1145
                    }
1146
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1147
                }
1148
            } else {
1149
                // Filter doesn't has data for current segment
1150
                $filterData = array();
1151
1152
                for ($count = 0; $count < $termInfo->docFreq; $count++) {
1153
                    $docDelta = $frqFile->readVInt();
1154
                    if ($docDelta % 2 == 1) {
1155
                        $docId += ($docDelta-1)/2;
1156
                        $result[$shift + $docId] = 1;
1157
                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1158
                    } else {
1159
                        $docId += $docDelta/2;
1160
                        $result[$shift + $docId] = $frqFile->readVInt();
1161
                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1162
                    }
1163
                }
1164
1165
                $docsFilter->segmentFilters[$this->_name] = $filterData;
1166
            }
1167
        } else {
1168 3
            for ($count = 0; $count < $termInfo->docFreq; $count++) {
1169 3
                $docDelta = $frqFile->readVInt();
1170 3
                if ($docDelta % 2 == 1) {
1171 3
                    $docId += ($docDelta-1)/2;
1172 3
                    $result[$shift + $docId] = 1;
1173
                } else {
1174
                    $docId += $docDelta/2;
1175
                    $result[$shift + $docId] = $frqFile->readVInt();
1176
                }
1177
            }
1178
        }
1179
1180 3
        return $result;
1181
    }
1182
1183
    /**
1184
     * Returns term positions array.
1185
     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
1186
     *
1187
     * @param Zend_Search_Lucene_Index_Term $term
1188
     * @param integer $shift
1189
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1190
     * @return Zend_Search_Lucene_Index_TermInfo
1191
     */
1192
    public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1193
    {
1194
        $termInfo = $this->getTermInfo($term);
1195
1196
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1197
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1198
                $docsFilter->segmentFilters[$this->_name] = array();
1199
            }
1200
            return array();
1201
        }
1202
1203
        $frqFile = $this->openCompoundFile('.frq');
1204
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1205
1206
        $docId = 0;
1207
        $freqs = array();
1208
1209
1210
        if ($docsFilter !== null) {
1211
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1212
                require_once 'Zend/Search/Lucene/Exception.php';
1213
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1214
            }
1215
1216
            if (isset($docsFilter->segmentFilters[$this->_name])) {
1217
                // Filter already has some data for the current segment
1218
1219
                // Make short name for the filter (which doesn't need additional dereferencing)
1220
                $filter = &$docsFilter->segmentFilters[$this->_name];
1221
1222
                // Check if filter is not empty
1223
                if (count($filter) == 0) {
1224
                    return array();
1225
                }
1226
1227
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1228
                    // Perform fetching
1229
// ---------------------------------------------------------------
1230
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1231
                        $docDelta = $frqFile->readVInt();
1232
                        if ($docDelta % 2 == 1) {
1233
                            $docId += ($docDelta-1)/2;
1234
                            $freqs[$docId] = 1;
1235
                        } else {
1236
                            $docId += $docDelta/2;
1237
                            $freqs[$docId] = $frqFile->readVInt();
1238
                        }
1239
                    }
1240
1241
                    $updatedFilterData = array();
1242
                    $result = array();
1243
                    $prxFile = $this->openCompoundFile('.prx');
1244
                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1245
                    foreach ($freqs as $docId => $freq) {
1246
                        $termPosition = 0;
1247
                        $positions = array();
1248
1249
                        // we have to read .prx file to get right position for next doc
1250
                        // even filter doesn't match current document
1251
                        for ($count = 0; $count < $freq; $count++ ) {
1252
                            $termPosition += $prxFile->readVInt();
1253
                            $positions[] = $termPosition;
1254
                        }
1255
1256
                        // Include into updated filter and into result only if doc is matched by filter
1257
                        if (isset($filter[$docId])) {
1258
                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1259
                            $result[$shift + $docId] = $positions;
1260
                        }
1261
                    }
1262
1263
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1264
// ---------------------------------------------------------------
1265
                } else {
1266
                    // Perform full scan
1267
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1268
                        $docDelta = $frqFile->readVInt();
1269
                        if ($docDelta % 2 == 1) {
1270
                            $docId += ($docDelta-1)/2;
1271
                            $freqs[$docId] = 1;
1272
                        } else {
1273
                            $docId += $docDelta/2;
1274
                            $freqs[$docId] = $frqFile->readVInt();
1275
                        }
1276
                    }
1277
1278
                    $updatedFilterData = array();
1279
                    $result = array();
1280
                    $prxFile = $this->openCompoundFile('.prx');
1281
                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1282
                    foreach ($freqs as $docId => $freq) {
1283
                        $termPosition = 0;
1284
                        $positions = array();
1285
1286
                        // we have to read .prx file to get right position for next doc
1287
                        // even filter doesn't match current document
1288
                        for ($count = 0; $count < $freq; $count++ ) {
1289
                            $termPosition += $prxFile->readVInt();
1290
                            $positions[] = $termPosition;
1291
                        }
1292
1293
                        // Include into updated filter and into result only if doc is matched by filter
1294
                        if (isset($filter[$docId])) {
1295
                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1296
                            $result[$shift + $docId] = $positions;
1297
                        }
1298
                    }
1299
1300
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1301
                }
1302
            } else {
1303
                // Filter doesn't has data for current segment
1304
                for ($count = 0; $count < $termInfo->docFreq; $count++) {
1305
                    $docDelta = $frqFile->readVInt();
1306
                    if ($docDelta % 2 == 1) {
1307
                        $docId += ($docDelta-1)/2;
1308
                        $freqs[$docId] = 1;
1309
                    } else {
1310
                        $docId += $docDelta/2;
1311
                        $freqs[$docId] = $frqFile->readVInt();
1312
                    }
1313
                }
1314
1315
                $filterData = array();
1316
                $result = array();
1317
                $prxFile = $this->openCompoundFile('.prx');
1318
                $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1319
                foreach ($freqs as $docId => $freq) {
1320
                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1321
1322
                    $termPosition = 0;
1323
                    $positions = array();
1324
1325
                    for ($count = 0; $count < $freq; $count++ ) {
1326
                        $termPosition += $prxFile->readVInt();
1327
                        $positions[] = $termPosition;
1328
                    }
1329
1330
                    $result[$shift + $docId] = $positions;
1331
                }
1332
1333
                $docsFilter->segmentFilters[$this->_name] = $filterData;
1334
            }
1335
        } else {
1336
            for ($count = 0; $count < $termInfo->docFreq; $count++) {
1337
                $docDelta = $frqFile->readVInt();
1338
                if ($docDelta % 2 == 1) {
1339
                    $docId += ($docDelta-1)/2;
1340
                    $freqs[$docId] = 1;
1341
                } else {
1342
                    $docId += $docDelta/2;
1343
                    $freqs[$docId] = $frqFile->readVInt();
1344
                }
1345
            }
1346
1347
            $result = array();
1348
            $prxFile = $this->openCompoundFile('.prx');
1349
            $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1350
            foreach ($freqs as $docId => $freq) {
1351
                $termPosition = 0;
1352
                $positions = array();
1353
1354
                for ($count = 0; $count < $freq; $count++ ) {
1355
                    $termPosition += $prxFile->readVInt();
1356
                    $positions[] = $termPosition;
1357
                }
1358
1359
                $result[$shift + $docId] = $positions;
1360
            }
1361
        }
1362
1363
        return $result;
1364
    }
1365
1366
    /**
1367
     * Load normalizatin factors from an index file
1368
     *
1369
     * @param integer $fieldNum
1370
     * @throws Zend_Search_Lucene_Exception
1371
     */
1372 3
    private function _loadNorm($fieldNum)
1373
    {
1374 3
        if ($this->_hasSingleNormFile) {
1375 3
            $normfFile = $this->openCompoundFile('.nrm');
1376
1377 3
            $header              = $normfFile->readBytes(3);
1378 3
            $headerFormatVersion = $normfFile->readByte();
1379
1380 3
            if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
1381
                require_once 'Zend/Search/Lucene/Exception.php';
1382
                throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
1383
            }
1384
1385 3
            foreach ($this->_fields as $fNum => $fieldInfo) {
1386 3
                if ($fieldInfo->isIndexed) {
1387 3
                    $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
1388
                }
1389
            }
1390
        } else {
1391
            $fFile = $this->openCompoundFile('.f' . $fieldNum);
1392
            $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
1393
        }
1394 3
    }
1395
1396
    /**
1397
     * Returns normalization factor for specified documents
1398
     *
1399
     * @param integer $id
1400
     * @param string $fieldName
1401
     * @return float
1402
     */
1403 3
    public function norm($id, $fieldName)
1404
    {
1405 3
        $fieldNum = $this->getFieldNum($fieldName);
1406
1407 3
        if ( !($this->_fields[$fieldNum]->isIndexed) ) {
1408
            return null;
1409
        }
1410
1411 3
        if (!isset($this->_norms[$fieldNum])) {
1412 3
            $this->_loadNorm($fieldNum);
1413
        }
1414
1415 3
        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
1416
    }
1417
1418
    /**
1419
     * Returns norm vector, encoded in a byte string
1420
     *
1421
     * @param string $fieldName
1422
     * @return string
1423
     */
1424
    public function normVector($fieldName)
1425
    {
1426
        $fieldNum = $this->getFieldNum($fieldName);
1427
1428
        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
1429
            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
1430
1431
            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
1432
                              $this->_docCount);
1433
        }
1434
1435
        if (!isset($this->_norms[$fieldNum])) {
1436
            $this->_loadNorm($fieldNum);
1437
        }
1438
1439
        return $this->_norms[$fieldNum];
1440
    }
1441
1442
1443
    /**
1444
     * Returns true if any documents have been deleted from this index segment.
1445
     *
1446
     * @return boolean
1447
     */
1448
    public function hasDeletions()
1449
    {
1450
        return $this->_deleted !== null;
1451
    }
1452
1453
1454
    /**
1455
     * Returns true if segment has single norms file.
1456
     *
1457
     * @return boolean
1458
     */
1459 18
    public function hasSingleNormFile()
1460
    {
1461 18
        return $this->_hasSingleNormFile ? true : false;
1462
    }
1463
1464
    /**
1465
     * Returns true if segment is stored using compound segment file.
1466
     *
1467
     * @return boolean
1468
     */
1469 18
    public function isCompound()
1470
    {
1471 18
        return $this->_isCompound;
1472
    }
1473
1474
    /**
1475
     * Deletes a document from the index segment.
1476
     * $id is an internal document id
1477
     *
1478
     * @param integer
1479
     */
1480 3
    public function delete($id)
1481
    {
1482 3
        $this->_deletedDirty = true;
1483
1484 3
        if (extension_loaded('bitset')) {
1485
            if ($this->_deleted === null) {
1486
                $this->_deleted = bitset_empty($id);
1487
            }
1488
            bitset_incl($this->_deleted, $id);
1489
        } else {
1490 3
            if ($this->_deleted === null) {
1491 3
                $this->_deleted = array();
1492
            }
1493
1494 3
            $this->_deleted[$id] = 1;
1495
        }
1496 3
    }
1497
1498
    /**
1499
     * Checks, that document is deleted
1500
     *
1501
     * @param integer
1502
     * @return boolean
1503
     */
1504 3
    public function isDeleted($id)
1505
    {
1506 3
        if ($this->_deleted === null) {
1507 3
            return false;
1508
        }
1509
1510
        if (extension_loaded('bitset')) {
1511
            return bitset_in($this->_deleted, $id);
1512
        } else {
1513
            return isset($this->_deleted[$id]);
1514
        }
1515
    }
1516
1517
    /**
1518
     * Detect latest delete generation
1519
     *
1520
     * Is actualy used from writeChanges() method or from the constructor if it's invoked from
1521
     * Index writer. In both cases index write lock is already obtained, so we shouldn't care
1522
     * about it
1523
     *
1524
     * @return integer
1525
     */
1526 20
    private function _detectLatestDelGen()
1527
    {
1528 20
        $delFileList = array();
1529 20
        foreach ($this->_directory->fileList() as $file) {
1530 20
            if ($file == $this->_name . '.del') {
1531
                // Matches <segment_name>.del file name
1532
                $delFileList[] = 0;
1533 20
            } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
1534
                // Matches <segment_name>_NNN.del file names
1535 20
                $delFileList[] = (int)base_convert($matches[1], 36, 10);
1536
            }
1537
        }
1538
1539 20
        if (count($delFileList) == 0) {
1540
            // There is no deletions file for current segment in the directory
1541
            // Set deletions file generation number to 1
1542 20
            return -1;
1543
        } else {
1544
            // There are some deletions files for current segment in the directory
1545
            // Set deletions file generation number to the highest nuber
1546 19
            return max($delFileList);
1547
        }
1548
    }
1549
1550
    /**
1551
     * Write changes if it's necessary.
1552
     *
1553
     * This method must be invoked only from the Writer _updateSegments() method,
1554
     * so index Write lock has to be already obtained.
1555
     *
1556
     * @internal
1557
     * @throws Zend_Search_Lucene_Exceptions
1558
     */
1559 20
    public function writeChanges()
1560
    {
1561
        // Get new generation number
1562 20
        $latestDelGen = $this->_detectLatestDelGen();
1563
1564 20
        if (!$this->_deletedDirty) {
1565
            // There was no deletions by current process
1566
1567 20
            if ($latestDelGen == $this->_delGen) {
1568
                // Delete file hasn't been updated by any concurrent process
1569 20
                return;
1570 2
            } else if ($latestDelGen > $this->_delGen) {
1571
                // Delete file has been updated by some concurrent process
1572
                // Reload deletions file
1573 2
                $this->_delGen  = $latestDelGen;
1574 2
                $this->_deleted = $this->_loadDelFile();
1575
1576 2
                return;
1577
            } else {
1578
                require_once 'Zend/Search/Lucene/Exception.php';
1579
                throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
1580
            }
1581
        }
1582
1583 5
        if ($latestDelGen > $this->_delGen) {
1584
            // Merge current deletions with latest deletions file
1585
            $this->_delGen = $latestDelGen;
1586
1587
            $latestDelete = $this->_loadDelFile();
1588
1589
            if (extension_loaded('bitset')) {
1590
                $this->_deleted = bitset_union($this->_deleted, $latestDelete);
1591
            } else {
1592
                $this->_deleted += $latestDelete;
1593
            }
1594
        }
1595
1596 5
        if (extension_loaded('bitset')) {
1597
            $delBytes = $this->_deleted;
1598
            $bitCount = count(bitset_to_array($delBytes));
1599
        } else {
1600 5
            $byteCount = floor($this->_docCount/8)+1;
1601 5
            $delBytes = str_repeat(chr(0), $byteCount);
1602 5
            for ($count = 0; $count < $byteCount; $count++) {
1603 5
                $byte = 0;
1604 5
                for ($bit = 0; $bit < 8; $bit++) {
1605 5
                    if (isset($this->_deleted[$count*8 + $bit])) {
1606 5
                        $byte |= (1<<$bit);
1607
                    }
1608
                }
1609 5
                $delBytes[$count] = chr($byte);
1610
            }
1611 5
            $bitCount = count($this->_deleted);
1612
        }
1613
1614 5
        if ($this->_delGen == -1) {
1615
            // Set delete file generation number to 1
1616 5
            $this->_delGen = 1;
1617
        } else {
1618
            // Increase delete file generation number by 1
1619
            $this->_delGen++;
1620
        }
1621
1622 5
        $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
1623 5
        $delFile->writeInt($this->_docCount);
1624 5
        $delFile->writeInt($bitCount);
1625 5
        $delFile->writeBytes($delBytes);
1626
1627 5
        $this->_deletedDirty = false;
1628 5
    }
1629
1630
1631
    /**
1632
     * Term Dictionary File object for stream like terms reading
1633
     *
1634
     * @var Zend_Search_Lucene_Storage_File
1635
     */
1636
    private $_tisFile = null;
1637
1638
    /**
1639
     * Actual offset of the .tis file data
1640
     *
1641
     * @var integer
1642
     */
1643
    private $_tisFileOffset;
1644
1645
    /**
1646
     * Frequencies File object for stream like terms reading
1647
     *
1648
     * @var Zend_Search_Lucene_Storage_File
1649
     */
1650
    private $_frqFile = null;
1651
1652
    /**
1653
     * Actual offset of the .frq file data
1654
     *
1655
     * @var integer
1656
     */
1657
    private $_frqFileOffset;
1658
1659
    /**
1660
     * Positions File object for stream like terms reading
1661
     *
1662
     * @var Zend_Search_Lucene_Storage_File
1663
     */
1664
    private $_prxFile = null;
1665
1666
    /**
1667
     * Actual offset of the .prx file in the compound file
1668
     *
1669
     * @var integer
1670
     */
1671
    private $_prxFileOffset;
1672
1673
1674
    /**
1675
     * Actual number of terms in term stream
1676
     *
1677
     * @var integer
1678
     */
1679
    private $_termCount = 0;
1680
1681
    /**
1682
     * Overall number of terms in term stream
1683
     *
1684
     * @var integer
1685
     */
1686
    private $_termNum = 0;
1687
1688
    /**
1689
     * Segment index interval
1690
     *
1691
     * @var integer
1692
     */
1693
    private $_indexInterval;
1694
1695
    /**
1696
     * Segment skip interval
1697
     *
1698
     * @var integer
1699
     */
1700
    private $_skipInterval;
1701
1702
    /**
1703
     * Last TermInfo in a terms stream
1704
     *
1705
     * @var Zend_Search_Lucene_Index_TermInfo
1706
     */
1707
    private $_lastTermInfo = null;
1708
1709
    /**
1710
     * Last Term in a terms stream
1711
     *
1712
     * @var Zend_Search_Lucene_Index_Term
1713
     */
1714
    private $_lastTerm = null;
1715
1716
    /**
1717
     * Map of the document IDs
1718
     * Used to get new docID after removing deleted documents.
1719
     * It's not very effective from memory usage point of view,
1720
     * but much more faster, then other methods
1721
     *
1722
     * @var array|null
1723
     */
1724
    private $_docMap = null;
1725
1726
    /**
1727
     * An array of all term positions in the documents.
1728
     * Array structure: array( docId => array( pos1, pos2, ...), ...)
1729
     *
1730
     * Is set to null if term positions loading has to be skipped
1731
     *
1732
     * @var array|null
1733
     */
1734
    private $_lastTermPositions;
1735
1736
1737
    /**
1738
     * Terms scan mode
1739
     *
1740
     * Values:
1741
     *
1742
     * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
1743
     * self::SM_FULL_INFO  - terms are scanned, frequency and position info is retrieved
1744
     * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
1745
     *                       document numbers are compacted (shifted if segment has deleted documents)
1746
     *
1747
     * @var integer
1748
     */
1749
    private $_termsScanMode;
1750
1751
    /** Scan modes */
1752
    const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
1753
    const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
1754
    const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
1755
                                // document numbers are compacted (shifted if segment contains deleted documents)
1756
1757
    /**
1758
     * Reset terms stream
1759
     *
1760
     * $startId - id for the fist document
1761
     * $compact - remove deleted documents
1762
     *
1763
     * Returns start document id for the next segment
1764
     *
1765
     * @param integer $startId
0 ignored issues
show
Bug introduced by
There is no parameter named $startId. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
1766
     * @param integer $mode
0 ignored issues
show
Bug introduced by
There is no parameter named $mode. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
1767
     * @throws Zend_Search_Lucene_Exception
1768
     * @return integer
1769
     */
1770
    public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
1771
    {
1772
        /**
1773
         * SegmentInfo->resetTermsStream() method actually takes two optional parameters:
1774
         *   $startId (default value is 0)
1775
         *   $mode (default value is self::SM_TERMS_ONLY)
1776
         */
1777
        $argList = func_get_args();
1778
        if (count($argList) > 2) {
1779
            require_once 'Zend/Search/Lucene/Exception.php';
1780
            throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
1781
        } else if (count($argList) == 2) {
1782
            $startId = $argList[0];
1783
            $mode    = $argList[1];
1784
        } else if (count($argList) == 1) {
1785
            $startId = $argList[0];
1786
            $mode    = self::SM_TERMS_ONLY;
1787
        } else {
1788
            $startId = 0;
1789
            $mode    = self::SM_TERMS_ONLY;
1790
        }
1791
1792
        if ($this->_tisFile !== null) {
1793
            $this->_tisFile = null;
1794
        }
1795
1796
        $this->_tisFile = $this->openCompoundFile('.tis', false);
1797
        $this->_tisFileOffset = $this->_tisFile->tell();
1798
1799
        $tiVersion = $this->_tisFile->readInt();
1800
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
1801
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
1802
            require_once 'Zend/Search/Lucene/Exception.php';
1803
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
1804
        }
1805
1806
        $this->_termCount     =
1807
              $this->_termNum = $this->_tisFile->readLong(); // Read terms count
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->_tisFile->readLong() can also be of type double. However, the property $_termNum is declared as type integer. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
Documentation Bug introduced by
It seems like $this->_termNum = $this->_tisFile->readLong() can also be of type double. However, the property $_termCount is declared as type integer. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1808
        $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
1809
        $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
1810
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
1811
            $maxSkipLevels = $this->_tisFile->readInt();
1812
        }
1813
1814
        if ($this->_frqFile !== null) {
1815
            $this->_frqFile = null;
1816
        }
1817
        if ($this->_prxFile !== null) {
1818
            $this->_prxFile = null;
1819
        }
1820
        $this->_docMap = array();
1821
1822
        $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
1823
        $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
1824
        $this->_lastTermPositions = null;
1825
1826
        $this->_termsScanMode = $mode;
1827
1828
        switch ($mode) {
1829
            case self::SM_TERMS_ONLY:
1830
                // Do nothing
1831
                break;
1832
1833
            case self::SM_FULL_INFO:
1834
                // break intentionally omitted
1835
            case self::SM_MERGE_INFO:
1836
                $this->_frqFile = $this->openCompoundFile('.frq', false);
1837
                $this->_frqFileOffset = $this->_frqFile->tell();
1838
1839
                $this->_prxFile = $this->openCompoundFile('.prx', false);
1840
                $this->_prxFileOffset = $this->_prxFile->tell();
1841
1842
                for ($count = 0; $count < $this->_docCount; $count++) {
1843
                    if (!$this->isDeleted($count)) {
1844
                        $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
1845
                    }
1846
                }
1847
                break;
1848
1849
            default:
1850
                require_once 'Zend/Search/Lucene/Exception.php';
1851
                throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
1852
                break;
0 ignored issues
show
Unused Code introduced by
break; does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
1853
        }
1854
1855
        // Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call)
1856
        $nextSegmentStartId = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
1857
        $this->nextTerm();
1858
1859
        return $nextSegmentStartId;
1860
    }
1861
1862
1863
    /**
1864
     * Skip terms stream up to the specified term preffix.
1865
     *
1866
     * Prefix contains fully specified field info and portion of searched term
1867
     *
1868
     * @param Zend_Search_Lucene_Index_Term $prefix
1869
     * @throws Zend_Search_Lucene_Exception
1870
     */
1871
    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
1872
    {
1873
        if ($this->_termDictionary === null) {
1874
            $this->_loadDictionaryIndex();
1875
        }
1876
1877
        $searchField = $this->getFieldNum($prefix->field);
1878
1879
        if ($searchField == -1) {
1880
            /**
1881
             * Field is not presented in this segment
1882
             * Go to the end of dictionary
1883
             */
1884
            $this->_tisFile = null;
1885
            $this->_frqFile = null;
1886
            $this->_prxFile = null;
1887
1888
            $this->_lastTerm          = null;
1889
            $this->_lastTermInfo      = null;
1890
            $this->_lastTermPositions = null;
1891
1892
            return;
1893
        }
1894
        $searchDicField = $this->_getFieldPosition($searchField);
1895
1896
        // search for appropriate value in dictionary
1897
        $lowIndex = 0;
1898
        $highIndex = count($this->_termDictionary)-1;
1899
        while ($highIndex >= $lowIndex) {
1900
            // $mid = ($highIndex - $lowIndex)/2;
1901
            $mid = ($highIndex + $lowIndex) >> 1;
1902
            $midTerm = $this->_termDictionary[$mid];
1903
1904
            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
1905
            $delta = $searchDicField - $fieldNum;
1906
            if ($delta == 0) {
1907
                $delta = strcmp($prefix->text, $midTerm[1] /* text */);
1908
            }
1909
1910
            if ($delta < 0) {
1911
                $highIndex = $mid-1;
1912
            } elseif ($delta > 0) {
1913
                $lowIndex  = $mid+1;
1914
            } else {
1915
                // We have reached term we are looking for
1916
                break;
1917
            }
1918
        }
1919
1920
        if ($highIndex == -1) {
1921
            // Term is out of the dictionary range
1922
            $this->_tisFile = null;
1923
            $this->_frqFile = null;
1924
            $this->_prxFile = null;
1925
1926
            $this->_lastTerm          = null;
1927
            $this->_lastTermInfo      = null;
1928
            $this->_lastTermPositions = null;
1929
1930
            return;
1931
        }
1932
1933
        $prevPosition = $highIndex;
1934
        $prevTerm = $this->_termDictionary[$prevPosition];
1935
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
1936
1937
        if ($this->_tisFile === null) {
1938
            // The end of terms stream is reached and terms dictionary file is closed
1939
            // Perform mini-reset operation
1940
            $this->_tisFile = $this->openCompoundFile('.tis', false);
1941
1942
            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1943
                $this->_frqFile = $this->openCompoundFile('.frq', false);
1944
                $this->_prxFile = $this->openCompoundFile('.prx', false);
1945
            }
1946
        }
1947
        $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
1948
1949
        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
1950
                                                                 ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
1951
        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
1952
                                                                     $prevTermInfo[1] /* freqPointer */,
1953
                                                                     $prevTermInfo[2] /* proxPointer */,
1954
                                                                     $prevTermInfo[3] /* skipOffset */);
1955
        $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;
1956
1957
        if ($highIndex == 0) {
1958
            // skip start entry
1959
            $this->nextTerm();
1960
        } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
1961
            // We got exact match in the dictionary index
1962
1963
            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1964
                $this->_lastTermPositions = array();
1965
1966
                $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
1967
                $freqs = array();   $docId = 0;
1968
                for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
1969
                    $docDelta = $this->_frqFile->readVInt();
1970
                    if( $docDelta % 2 == 1 ) {
1971
                        $docId += ($docDelta-1)/2;
1972
                        $freqs[ $docId ] = 1;
1973
                    } else {
1974
                        $docId += $docDelta/2;
1975
                        $freqs[ $docId ] = $this->_frqFile->readVInt();
1976
                    }
1977
                }
1978
1979
                $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
1980
                foreach ($freqs as $docId => $freq) {
1981
                    $termPosition = 0;  $positions = array();
1982
1983
                    for ($count = 0; $count < $freq; $count++ ) {
1984
                        $termPosition += $this->_prxFile->readVInt();
1985
                        $positions[] = $termPosition;
1986
                    }
1987
1988
                    if (isset($this->_docMap[$docId])) {
1989
                        $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
1990
                    }
1991
                }
1992
            }
1993
1994
            return;
1995
        }
1996
1997
        // Search term matching specified prefix
1998
        while ($this->_lastTerm !== null) {
1999
            if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
2000
                 ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
2001
                    // Current term matches or greate than the pattern
2002
                    return;
2003
            }
2004
2005
            $this->nextTerm();
2006
        }
2007
    }
2008
2009
2010
    /**
2011
     * Scans terms dictionary and returns next term
2012
     *
2013
     * @return Zend_Search_Lucene_Index_Term|null
2014
     */
2015
    public function nextTerm()
2016
    {
2017
        if ($this->_tisFile === null  ||  $this->_termCount == 0) {
2018
            $this->_lastTerm          = null;
2019
            $this->_lastTermInfo      = null;
2020
            $this->_lastTermPositions = null;
2021
            $this->_docMap            = null;
2022
2023
            // may be necessary for "empty" segment
2024
            $this->_tisFile = null;
2025
            $this->_frqFile = null;
2026
            $this->_prxFile = null;
2027
2028
            return null;
2029
        }
2030
2031
        $termPrefixLength = $this->_tisFile->readVInt();
2032
        $termSuffix       = $this->_tisFile->readString();
2033
        $termFieldNum     = $this->_tisFile->readVInt();
2034
        $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
2035
2036
        $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
2037
2038
        $docFreq     = $this->_tisFile->readVInt();
2039
        $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
2040
        $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
2041
        if ($docFreq >= $this->_skipInterval) {
2042
            $skipOffset = $this->_tisFile->readVInt();
2043
        } else {
2044
            $skipOffset = 0;
2045
        }
2046
2047
        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
2048
2049
2050
        if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
2051
            $this->_lastTermPositions = array();
2052
2053
            $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
2054
            $freqs = array();   $docId = 0;
2055
            for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
2056
                $docDelta = $this->_frqFile->readVInt();
2057
                if( $docDelta % 2 == 1 ) {
2058
                    $docId += ($docDelta-1)/2;
2059
                    $freqs[ $docId ] = 1;
2060
                } else {
2061
                    $docId += $docDelta/2;
2062
                    $freqs[ $docId ] = $this->_frqFile->readVInt();
2063
                }
2064
            }
2065
2066
            $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
2067
            foreach ($freqs as $docId => $freq) {
2068
                $termPosition = 0;  $positions = array();
2069
2070
                for ($count = 0; $count < $freq; $count++ ) {
2071
                    $termPosition += $this->_prxFile->readVInt();
2072
                    $positions[] = $termPosition;
2073
                }
2074
2075
                if (isset($this->_docMap[$docId])) {
2076
                    $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
2077
                }
2078
            }
2079
        }
2080
2081
        $this->_termCount--;
2082
        if ($this->_termCount == 0) {
2083
            $this->_tisFile = null;
2084
            $this->_frqFile = null;
2085
            $this->_prxFile = null;
2086
        }
2087
2088
        return $this->_lastTerm;
2089
    }
2090
2091
    /**
2092
     * Close terms stream
2093
     *
2094
     * Should be used for resources clean up if stream is not read up to the end
2095
     */
2096
    public function closeTermsStream()
2097
    {
2098
        $this->_tisFile = null;
2099
        $this->_frqFile = null;
2100
        $this->_prxFile = null;
2101
2102
        $this->_lastTerm          = null;
2103
        $this->_lastTermInfo      = null;
2104
        $this->_lastTermPositions = null;
2105
2106
        $this->_docMap            = null;
2107
    }
2108
2109
2110
    /**
2111
     * Returns term in current position
2112
     *
2113
     * @return Zend_Search_Lucene_Index_Term|null
2114
     */
2115
    public function currentTerm()
2116
    {
2117
        return $this->_lastTerm;
2118
    }
2119
2120
2121
    /**
2122
     * Returns an array of all term positions in the documents.
2123
     * Return array structure: array( docId => array( pos1, pos2, ...), ...)
2124
     *
2125
     * @return array
2126
     */
2127
    public function currentTermPositions()
2128
    {
2129
        return $this->_lastTermPositions;
2130
    }
2131
}
2132
2133