Failed Conditions
Pull Request — master (#2943)
by Andreas
03:32
created

inc/Search/AbstractIndex.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexLockException;
6
use dokuwiki\Search\Exception\IndexWriteException;
7
use dokuwiki\Utf8;
8
9
/**
10
 * Abstract Class DokuWiki Index
11
 *
12
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13
 * @author     Andreas Gohr <[email protected]>
14
 * @author Tom N Harris <[email protected]>
15
 */
16
abstract class AbstractIndex
17
{
18
    /* pages will be marked as deleted in page.idx */
19
    const INDEX_MARK_DELETED = '#deleted:';
20
21
    /** @var array $pidCache Cache for getPID() */
22
    protected static $pidCache = array();
23
24
    /**
25
     * AbstractIndex constructor
26
     * extended classes should be Singleton, prevent direct object creation
27
     */
28
    protected function __construct() {}
29
30
    /**
31
     * Clean a name of a key for use as a file name.
32
     *
33
     * Romanizes non-latin characters, then strips away anything that's
34
     * not a letter, number, or underscore.
35
     *
36
     * @author Tom N Harris <[email protected]>
37
     *
38
     * @param string $name
39
     * @return string
40
     */
41
    protected function cleanName($name)
42
    {
43
        $name = Utf8\Clean::romanize(trim((string)$name));
44
        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
45
        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
46
        return strtolower($name);
47
    }
48
49
    /**
50
     * Get the numeric PID of a page
51
     *
52
     * Warning: The page may not exist in the filesystem.
53
     *
54
     * @param string $page The page to get the PID for
55
     * @return int|false  The page id on success, false when not found in page.idx
56
     * @throws IndexWriteException
57
     * @throws IndexLockException
58
     */
59
    public function getPID($page)
60
    {
61
        // return PID when it is in the cache
62
        // avoid expensive addIndexKey operation for the most recently
63
        // requested pages by using a cache
64
        if (isset(static::$pidCache[$page])) return static::$pidCache[$page];
65
66
        if (!$this->lock()) return false;
67
68
        $index = $this->getIndex('page', '');
69
        $pid = array_search($page, $index, true);
0 ignored issues
show
Bug Compatibility introduced by
The expression array_search($page, $index, true); of type false|integer|string adds the type string to the return on line 95 which is incompatible with the return type documented by dokuwiki\Search\AbstractIndex::getPID of type integer|false.
Loading history...
70
        if ($pid !== false) {
71
            $flagSaveIndex = false;
72
        } else {
73
            $flagSaveIndex = true;
74
            // search old page entry that had marked as deleted
75
            $pid = array_search(self::INDEX_MARK_DELETED.$page, $index, true);
0 ignored issues
show
Bug Compatibility introduced by
The expression array_search(self::INDEX.... $page, $index, true); of type false|integer|string adds the type string to the return on line 95 which is incompatible with the return type documented by dokuwiki\Search\AbstractIndex::getPID of type integer|false.
Loading history...
76
            if ($pid !== false) {
77
                $index[$pid] = $page;
78
            } else {
79
                $pid = count($index);
80
                $index[$pid] = $page;
81
            }
82
        }
83
84
        if ($flagSaveIndex && !$this->saveIndex('page', '', $index)) {
85
            throw new IndexWriteException('Indexer: Failed to write page index');
86
        }
87
88
        // limit cache to 10 entries by discarding the oldest element
89
        // as in DokuWiki usually only the most recently
90
        // added item will be requested again
91
        if (count(static::$pidCache) > 10) array_shift(static::$pidCache);
92
        static::$pidCache[$page] = $pid;
93
94
        $this->unlock();
95
        return $pid;
96
    }
97
98
    /**
99
     * Reset pidCache
100
     */
101
    protected function resetPIDCache()
102
    {
103
        static::$pidCache = array();
104
    }
105
106
    /**
107
     * Get the page id of a numeric PID
108
     *
109
     * @param int $pid The PID to get the page id for
110
     * @return string The page id
111
     */
112
    public function getPageFromPID($pid)
113
    {
114
        return $this->getIndexKey('page', '', $pid);
115
    }
116
117
    /**
118
     * Return a list of all pages
119
     * Warning: pages may not exist in the filesystem.
120
     *
121
     * @return array            list of page names
122
     */
123
    public function getPages()
124
    {
125
        return array_filter($this->getIndex('page', ''),
126
            function ($v) {
127
                return ($v[0] !== self::INDEX_MARK_DELETED[0]);
128
            }
129
        );
130
    }
131
132
    /**
133
     * Lock the indexer
134
     *
135
     * @return true
136
     * @throws IndexLockException
137
     * @author Tom N Harris <[email protected]>
138
     *
139
     */
140
    protected function lock()
141
    {
142
        global $conf;
143
        $run = 0;
144
        $lock = $conf['lockdir'].'/_indexer.lock';
145
        while (!@mkdir($lock, $conf['dmode'])) {
146
            usleep(50);
147
            if (is_dir($lock) && time() - @filemtime($lock) > 60*5) {
148
                // looks like a stale lock - remove it
149
                if (!@rmdir($lock)) {
150
                    throw new IndexLockException('Indexer: removing the stale lock failed');
151
                }
152
            } elseif ($run++ == 1000) {
153
                // we waited 5 seconds for that lock
154
                throw new IndexLockException('Indexer: time out to aquire lock');
155
            }
156
        }
157
        if (!empty($conf['dperm'])) {
158
            chmod($lock, $conf['dperm']);
159
        }
160
        return true;
161
    }
162
163
    /**
164
     * Release the indexer lock
165
     *
166
     * @return true
167
     * @throws IndexLockException
168
     * @author Tom N Harris <[email protected]>
169
     *
170
     */
171
    protected function unlock()
172
    {
173
        global $conf;
174
        if (!@rmdir($conf['lockdir'].'/_indexer.lock')) {
175
            throw new IndexLockException('Indexer: unlock failed');
176
        }
177
        return true;
178
    }
179
180
    /**
181
     * Retrieve the entire index
182
     *
183
     * The $suffix argument is for an index that is split into multiple parts.
184
     * Different index files should use different base names.
185
     *
186
     * @param string    $idx    name of the index
187
     * @param string    $suffix subpart identifier
188
     * @return array            list of lines without CR or LF
189
     *
190
     * @author Tom N Harris <[email protected]>
191
     */
192
    public function getIndex($idx, $suffix)
193
    {
194
        global $conf;
195
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
196
        if (!file_exists($fn)) return array();
197
        return file($fn, FILE_IGNORE_NEW_LINES);
198
    }
199
200
    /**
201
     * Replace the contents of the index with an array
202
     *
203
     * @param string    $idx    name of the index
204
     * @param string    $suffix subpart identifier
205
     * @param array     $lines  list of lines without LF
206
     * @return bool             If saving succeeded
207
     *
208
     * @author Tom N Harris <[email protected]>
209
     */
210
    protected function saveIndex($idx, $suffix, $lines)
211
    {
212
        global $conf;
213
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
214
        $fh = @fopen($fn.'.tmp', 'w');
215
        if (!$fh) return false;
216
        fwrite($fh, implode("\n", $lines));
217
        if (!empty($lines)) {
218
            fwrite($fh, "\n");
219
        }
220
        fclose($fh);
221
        if ($conf['fperm']) {
222
            chmod($fn.'.tmp', $conf['fperm']);
223
        }
224
        io_rename($fn.'.tmp', $fn.'.idx');
225
        return true;
226
    }
227
228
    /**
229
     * Retrieve or insert a value in the index
230
     *
231
     * @param string $idx name of the index
232
     * @param string $suffix subpart identifier
233
     * @param string $value line to find in the index
234
     * @return int      line number of the value in the index
235
     *
236
     * @throws IndexWriteException
237
     * @author Tom N Harris <[email protected]>
238
     */
239
    protected function addIndexKey($idx, $suffix, $value)
240
    {
241
        $index = $this->getIndex($idx, $suffix);
242
        $id = array_search($value, $index, true);
243
        if ($id === false) {
244
            $id = count($index);
245
            $index[$id] = $value;
246
            if (!$this->saveIndex($idx, $suffix, $index)) {
247
                throw new IndexWriteException("Failed to write {$idx}{$suffix} index");
248
            }
249
        }
250
        return (int) $id;
251
    }
252
253
    /**
254
     * Write a line into the index
255
     *
256
     * @param string    $idx    name of the index
257
     * @param string    $suffix subpart identifier
258
     * @param int       $id     the line number
259
     * @param string    $line   line to write
260
     * @return bool             If saving succeeded
261
     *
262
     * @author Tom N Harris <[email protected]>
263
     */
264
    protected function saveIndexKey($idx, $suffix, $id, $line)
265
    {
266
        global $conf;
267
        if (substr($line, -1) !== "\n") {
268
            $line .= "\n";
269
        }
270
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
271
        $fh = @fopen($fn.'.tmp', 'w');
272
        if (!$fh) return false;
273
        $ih = @fopen($fn.'.idx', 'r');
274
        if ($ih) {
275
            $ln = -1;
276
            while (($curline = fgets($ih)) !== false) {
277
                fwrite($fh, (++$ln == $id) ? $line : $curline);
278
            }
279
            if ($id > $ln) {
280
                while ($id > ++$ln) {
281
                    fwrite($fh, "\n");
282
                }
283
                fwrite($fh, $line);
284
            }
285
            fclose($ih);
286
        } else {
287
            $ln = -1;
288
            while ($id > ++$ln) {
289
                fwrite($fh, "\n");
290
            }
291
            fwrite($fh, $line);
292
        }
293
        fclose($fh);
294
        if ($conf['fperm']) {
295
            chmod($fn.'.tmp', $conf['fperm']);
296
        }
297
        io_rename($fn.'.tmp', $fn.'.idx');
298
        return true;
299
    }
300
301
    /**
302
     * Retrieve a line from the index
303
     *
304
     * @param string    $idx    name of the index
305
     * @param string    $suffix subpart identifier
306
     * @param int       $id     the line number
307
     * @return string           a line with trailing whitespace removed
308
     *
309
     * @author Tom N Harris <[email protected]>
310
     */
311
    protected function getIndexKey($idx, $suffix, $id)
312
    {
313
        global $conf;
314
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
315
        if (!file_exists($fn)) return '';
316
        $fh = @fopen($fn, 'r');
317
        if (!$fh) return '';
318
        $ln = -1;
319
        while (($line = fgets($fh)) !== false) {
320
            if (++$ln == $id) break;
321
        }
322
        fclose($fh);
323
        return rtrim((string)$line);
324
    }
325
326
    /**
327
     * Insert or replace a tuple in a line
328
     *
329
     * @author Tom N Harris <[email protected]>
330
     *
331
     * @param string     $line
332
     * @param int|string $id
333
     * @param int        $count
334
     * @return string
335
     */
336
    protected function updateTuple($line, $id, $count)
337
    {
338
        if ($line != '') {
339
            $line = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $line);
340
        }
341
        $line = trim($line, ':');
342
        if ($count) {
343
            if ($line) {
344
                return "{$id}*{$count}:".$line;
345
            } else {
346
                return "{$id}*{$count}";
347
            }
348
        }
349
        return $line;
350
    }
351
352
    /**
353
     * Split a line into an array of tuples
354
     *
355
     * @author Tom N Harris <[email protected]>
356
     * @author Andreas Gohr <[email protected]>
357
     *
358
     * @param array      $keys
359
     * @param string     $line
360
     * @return array
361
     */
362
    protected function parseTuples($keys, $line)
363
    {
364
        $result = array();
365
        if ($line == '') return $result;
366
        $parts = explode(':', $line);
367
        foreach ($parts as $tuple) {
368
            if ($tuple === '') continue;
369
            list($key, $cnt) = explode('*', $tuple);
370
            if (!$cnt) continue;
371
            $key = $keys[$key];
372
            if ($key === false || is_null($key)) continue;
373
            $result[$key] = $cnt;
374
        }
375
        return $result;
376
    }
377
378
    /**
379
     * Sum the counts in a list of tuples
380
     *
381
     * @author Tom N Harris <[email protected]>
382
     *
383
     * @param string     $line
384
     * @return int
385
     */
386
    protected function countTuples($line)
387
    {
388
        $freq = 0;
389
        $parts = explode(':', $line);
390
        foreach ($parts as $tuple) {
391
            if ($tuple === '') continue;
392
            list(/* $pid */, $cnt) = explode('*', $tuple);
393
            $freq += (int)$cnt;
394
        }
395
        return $freq;
396
    }
397
398
    /**
399
     * Clear the whole index
400
     *
401
     * @return bool  If the index has been cleared successfully
402
     */
403
    abstract public function clear();
404
}
405