Failed Conditions
Pull Request — master (#2943)
by Andreas
03:19
created

inc/Search/AbstractIndex.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Utf8;
6
7
/**
8
 * Abstract Class DokuWiki Index
9
 *
10
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
11
 * @author     Andreas Gohr <[email protected]>
12
 * @author Tom N Harris <[email protected]>
13
 */
14
abstract class AbstractIndex
15
{
16
    /* pages will be marked as deleted in page.idx */
17
    const INDEX_MARK_DELETED = '#deleted:';
18
19
    /** @var array $pidCache Cache for getPID() */
20
    protected static $pidCache = array();
21
22
    /**
23
     * AbstractIndex constructor
24
     * extended classes should be Singleton, prevent direct object creation
25
     */
26
    protected function __construct() {}
27
28
    /**
29
     * Clean a name of a key for use as a file name.
30
     *
31
     * Romanizes non-latin characters, then strips away anything that's
32
     * not a letter, number, or underscore.
33
     *
34
     * @author Tom N Harris <[email protected]>
35
     *
36
     * @param string $name
37
     * @return string
38
     */
39
    protected function cleanName($name)
40
    {
41
        $name = Utf8\Clean::romanize(trim((string)$name));
42
        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
43
        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
44
        return strtolower($name);
45
    }
46
47
    /**
48
     * Get the numeric PID of a page
49
     *
50
     * Warning: The page may not exist in the filesystem.
51
     *
52
     * @param string $page The page to get the PID for
53
     * @return int|false  The page id on success, false when not found in page.idx
54
     */
55
    public function getPID($page)
56
    {
57
        // return PID when it is in the cache
58
        // avoid expensive addIndexKey operation for the most recently
59
        // requested pages by using a cache
60
        if (isset(static::$pidCache[$page])) return static::$pidCache[$page];
61
62
        if (!$this->lock()) return false;
63
64
        $index = $this->getIndex('page', '');
65
        $pid = array_search($page, $index, true);
0 ignored issues
show
Bug Compatibility introduced by
The expression array_search($page, $index, true); of type false|integer|string adds the type string to the return on line 92 which is incompatible with the return type documented by dokuwiki\Search\AbstractIndex::getPID of type integer|false.
Loading history...
66
        if ($pid !== false) {
67
            $flagSaveIndex = false;
68
        } else {
69
            $flagSaveIndex = true;
70
            // search old page entry that had marked as deleted
71
            $pid = array_search(self::INDEX_MARK_DELETED.$page, $index, true);
0 ignored issues
show
Bug Compatibility introduced by
The expression array_search(self::INDEX.... $page, $index, true); of type false|integer|string adds the type string to the return on line 92 which is incompatible with the return type documented by dokuwiki\Search\AbstractIndex::getPID of type integer|false.
Loading history...
72
            if ($pid !== false) {
73
                $index[$pid] = $page;
74
            } else {
75
                $pid = count($index);
76
                $index[$pid] = $page;
77
            }
78
        }
79
80
        if ($flagSaveIndex && !$this->saveIndex('page', '', $index)) {
81
            trigger_error("Indexer: Failed to write page index", E_USER_ERROR);
82
            return false;
83
        }
84
85
        // limit cache to 10 entries by discarding the oldest element
86
        // as in DokuWiki usually only the most recently
87
        // added item will be requested again
88
        if (count(static::$pidCache) > 10) array_shift(static::$pidCache);
89
        static::$pidCache[$page] = $pid;
90
91
        $this->unlock();
92
        return $pid;
93
    }
94
95
    /**
96
     * Reset pidCache
97
     */
98
    protected function resetPIDCache()
99
    {
100
        static::$pidCache = array();
101
    }
102
103
    /**
104
     * Get the page id of a numeric PID
105
     *
106
     * @param int $pid The PID to get the page id for
107
     * @return string The page id
108
     */
109
    public function getPageFromPID($pid)
110
    {
111
        return $this->getIndexKey('page', '', $pid);
112
    }
113
114
    /**
115
     * Return a list of all pages
116
     * Warning: pages may not exist in the filesystem.
117
     *
118
     * @return array            list of page names
119
     */
120
    public function getPages()
121
    {
122
        return array_filter($this->getIndex('page', ''),
123
            function ($v) {
124
                return ($v[0] !== self::INDEX_MARK_DELETED[0]);
125
            }
126
        );
127
    }
128
129
    /**
130
     * Lock the indexer
131
     *
132
     * @author Tom N Harris <[email protected]>
133
     *
134
     * @return bool
135
     */
136
    protected function lock()
137
    {
138
        global $conf;
139
        $run = 0;
140
        $lock = $conf['lockdir'].'/_indexer.lock';
141
        while (!@mkdir($lock, $conf['dmode'])) {
142
            usleep(50);
143
            if (is_dir($lock) && time() - @filemtime($lock) > 60*5) {
144
                // looks like a stale lock - remove it
145
                if (!@rmdir($lock)) {
146
                    trigger_error("Indexer: removing the stale lock failed", E_USER_ERROR);
147
                    return false;
148
                }
149
            } elseif ($run++ == 1000) {
150
                // we waited 5 seconds for that lock
151
                trigger_error("Indexer: time out to aquire lock", E_USER_ERROR);
152
                return false;
153
            }
154
        }
155
        if (!empty($conf['dperm'])) {
156
            chmod($lock, $conf['dperm']);
157
        }
158
        return true;
159
    }
160
161
    /**
162
     * Release the indexer lock
163
     *
164
     * @author Tom N Harris <[email protected]>
165
     *
166
     * @return bool
167
     */
168
    protected function unlock()
169
    {
170
        global $conf;
171
        if (!@rmdir($conf['lockdir'].'/_indexer.lock')) {
172
            trigger_error("Indexer: unlock failed", E_USER_WARNING);
173
            return false;
174
        }
175
        return true;
176
    }
177
178
    /**
179
     * Retrieve the entire index
180
     *
181
     * The $suffix argument is for an index that is split into multiple parts.
182
     * Different index files should use different base names.
183
     *
184
     * @param string    $idx    name of the index
185
     * @param string    $suffix subpart identifier
186
     * @return array            list of lines without CR or LF
187
     *
188
     * @author Tom N Harris <[email protected]>
189
     */
190
    public function getIndex($idx, $suffix)
191
    {
192
        global $conf;
193
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
194
        if (!file_exists($fn)) return array();
195
        return file($fn, FILE_IGNORE_NEW_LINES);
196
    }
197
198
    /**
199
     * Replace the contents of the index with an array
200
     *
201
     * @param string    $idx    name of the index
202
     * @param string    $suffix subpart identifier
203
     * @param array     $lines  list of lines without LF
204
     * @return bool             If saving succeeded
205
     *
206
     * @author Tom N Harris <[email protected]>
207
     */
208
    protected function saveIndex($idx, $suffix, $lines)
209
    {
210
        global $conf;
211
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
212
        $fh = @fopen($fn.'.tmp', 'w');
213
        if (!$fh) return false;
214
        fwrite($fh, implode("\n", $lines));
215
        if (!empty($lines)) {
216
            fwrite($fh, "\n");
217
        }
218
        fclose($fh);
219
        if ($conf['fperm']) {
220
            chmod($fn.'.tmp', $conf['fperm']);
221
        }
222
        io_rename($fn.'.tmp', $fn.'.idx');
223
        return true;
224
    }
225
226
    /**
227
     * Retrieve or insert a value in the index
228
     *
229
     * @param string    $idx    name of the index
230
     * @param string    $suffix subpart identifier
231
     * @param string    $value  line to find in the index
232
     * @return int|false        line number of the value in the index
233
     *                          or false if writing the index failed
234
     *
235
     * @author Tom N Harris <[email protected]>
236
     */
237
    protected function addIndexKey($idx, $suffix, $value)
238
    {
239
        $index = $this->getIndex($idx, $suffix);
240
        $id = array_search($value, $index, true);
241
        if ($id === false) {
242
            $id = count($index);
243
            $index[$id] = $value;
244
            if (!$this->saveIndex($idx, $suffix, $index)) {
245
                trigger_error("Failed to write {$idx}{$suffix} index", E_USER_ERROR);
246
                return false;
247
            }
248
        }
249
        return (int) $id;
250
    }
251
252
    /**
253
     * Write a line into the index
254
     *
255
     * @param string    $idx    name of the index
256
     * @param string    $suffix subpart identifier
257
     * @param int       $id     the line number
258
     * @param string    $line   line to write
259
     * @return bool             If saving succeeded
260
     *
261
     * @author Tom N Harris <[email protected]>
262
     */
263
    protected function saveIndexKey($idx, $suffix, $id, $line)
264
    {
265
        global $conf;
266
        if (substr($line, -1) !== "\n") {
267
            $line .= "\n";
268
        }
269
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
270
        $fh = @fopen($fn.'.tmp', 'w');
271
        if (!$fh) return false;
272
        $ih = @fopen($fn.'.idx', 'r');
273
        if ($ih) {
274
            $ln = -1;
275
            while (($curline = fgets($ih)) !== false) {
276
                fwrite($fh, (++$ln == $id) ? $line : $curline);
277
            }
278
            if ($id > $ln) {
279
                while ($id > ++$ln) {
280
                    fwrite($fh, "\n");
281
                }
282
                fwrite($fh, $line);
283
            }
284
            fclose($ih);
285
        } else {
286
            $ln = -1;
287
            while ($id > ++$ln) {
288
                fwrite($fh, "\n");
289
            }
290
            fwrite($fh, $line);
291
        }
292
        fclose($fh);
293
        if ($conf['fperm']) {
294
            chmod($fn.'.tmp', $conf['fperm']);
295
        }
296
        io_rename($fn.'.tmp', $fn.'.idx');
297
        return true;
298
    }
299
300
    /**
301
     * Retrieve a line from the index
302
     *
303
     * @param string    $idx    name of the index
304
     * @param string    $suffix subpart identifier
305
     * @param int       $id     the line number
306
     * @return string           a line with trailing whitespace removed
307
     *
308
     * @author Tom N Harris <[email protected]>
309
     */
310
    protected function getIndexKey($idx, $suffix, $id)
311
    {
312
        global $conf;
313
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
314
        if (!file_exists($fn)) return '';
315
        $fh = @fopen($fn, 'r');
316
        if (!$fh) return '';
317
        $ln = -1;
318
        while (($line = fgets($fh)) !== false) {
319
            if (++$ln == $id) break;
320
        }
321
        fclose($fh);
322
        return rtrim((string)$line);
323
    }
324
325
    /**
326
     * Insert or replace a tuple in a line
327
     *
328
     * @author Tom N Harris <[email protected]>
329
     *
330
     * @param string     $line
331
     * @param int|string $id
332
     * @param int        $count
333
     * @return string
334
     */
335
    protected function updateTuple($line, $id, $count)
336
    {
337
        if ($line != '') {
338
            $line = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $line);
339
        }
340
        $line = trim($line, ':');
341
        if ($count) {
342
            if ($line) {
343
                return "{$id}*{$count}:".$line;
344
            } else {
345
                return "{$id}*{$count}";
346
            }
347
        }
348
        return $line;
349
    }
350
351
    /**
352
     * Split a line into an array of tuples
353
     *
354
     * @author Tom N Harris <[email protected]>
355
     * @author Andreas Gohr <[email protected]>
356
     *
357
     * @param array      $keys
358
     * @param string     $line
359
     * @return array
360
     */
361
    protected function parseTuples($keys, $line)
362
    {
363
        $result = array();
364
        if ($line == '') return $result;
365
        $parts = explode(':', $line);
366
        foreach ($parts as $tuple) {
367
            if ($tuple === '') continue;
368
            list($key, $cnt) = explode('*', $tuple);
369
            if (!$cnt) continue;
370
            $key = $keys[$key];
371
            if ($key === false || is_null($key)) continue;
372
            $result[$key] = $cnt;
373
        }
374
        return $result;
375
    }
376
377
    /**
378
     * Sum the counts in a list of tuples
379
     *
380
     * @author Tom N Harris <[email protected]>
381
     *
382
     * @param string     $line
383
     * @return int
384
     */
385
    protected function countTuples($line)
386
    {
387
        $freq = 0;
388
        $parts = explode(':', $line);
389
        foreach ($parts as $tuple) {
390
            if ($tuple === '') continue;
391
            list(/* $pid */, $cnt) = explode('*', $tuple);
392
            $freq += (int)$cnt;
393
        }
394
        return $freq;
395
    }
396
397
    /**
398
     * Clear the whole index
399
     *
400
     * @return bool  If the index has been cleared successfully
401
     */
402
    abstract public function clear();
403
}
404