Failed Conditions
Pull Request — master (#2943)
by
unknown
03:12
created

AbstractIndex::lock()   B

Complexity

Conditions 7
Paths 8

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
nc 8
nop 0
dl 0
loc 22
rs 8.6346
c 0
b 0
f 0
1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexAccessException;
6
use dokuwiki\Search\Exception\IndexLockException;
7
use dokuwiki\Search\Exception\IndexWriteException;
8
use dokuwiki\Utf8;
9
10
/**
11
 * Abstract Class DokuWiki Index
12
 *
13
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
14
 * @author     Andreas Gohr <[email protected]>
15
 * @author Tom N Harris <[email protected]>
16
 */
17
abstract class AbstractIndex
18
{
19
    /* pages will be marked as deleted in page.idx */
20
    const INDEX_MARK_DELETED = '#deleted:';
21
22
    /** @var array $pidCache Cache for getPID() */
23
    protected static $pidCache = array();
24
25
    /**
26
     * Clean a name of a key for use as a file name.
27
     *
28
     * Romanizes non-latin characters, then strips away anything that's
29
     * not a letter, number, or underscore.
30
     *
31
     * @author Tom N Harris <[email protected]>
32
     *
33
     * @param string $name
34
     * @return string
35
     */
36
    protected function cleanName($name)
37
    {
38
        $name = Utf8\Clean::romanize(trim((string)$name));
39
        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
40
        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
41
        return strtolower($name);
42
    }
43
44
    /**
45
     * Get the numeric PID of a page
46
     *
47
     * Warning: The page may not exist in the filesystem.
48
     *
49
     * @param string $page The page to get the PID for
50
     * @return int  The numeric page id
51
     *
52
     * @throws IndexAccessException
53
     * @throws IndexLockException
54
     * @throws IndexWriteException
55
     */
56
    public function getPID($page)
57
    {
58
        if (!isset($page)) {
59
            throw new IndexAccessException('Indexer: invalid argument for getPID');
60
        }
61
62
        // return PID when it is in the cache
63
        // avoid expensive addIndexKey operation for the most recently
64
        // requested pages by using a cache
65
        if (isset(static::$pidCache[$page])) return static::$pidCache[$page];
66
67
        $this->lock();
68
69
        $index = $this->getIndex('page', '');
70
        $pid = array_search($page, $index, true);
71
        if ($pid !== false) {
72
            $flagSaveIndex = false;
73
        } else {
74
            $flagSaveIndex = true;
75
            // search old page entry that had marked as deleted
76
            $pid = array_search(self::INDEX_MARK_DELETED.$page, $index, true);
77
            if ($pid !== false) {
78
                $index[$pid] = $page;
79
            } else {
80
                $pid = count($index);
81
                $index[$pid] = $page;
82
            }
83
        }
84
85
        if ($flagSaveIndex) $this->saveIndex('page', '', $index);
86
87
        // limit cache to 10 entries by discarding the oldest element
88
        // as in DokuWiki usually only the most recently
89
        // added item will be requested again
90
        if (count(static::$pidCache) > 10) array_shift(static::$pidCache);
91
        static::$pidCache[$page] = $pid;
92
93
        $this->unlock();
94
        return $pid;
95
    }
96
97
    /**
98
     * Reset pidCache
99
     */
100
    protected function resetPIDCache()
101
    {
102
        static::$pidCache = array();
103
    }
104
105
    /**
106
     * Get the page id of a numeric PID
107
     *
108
     * @param int $pid The PID to get the page id for
109
     * @return string The page id
110
     */
111
    public function getPageFromPID($pid)
112
    {
113
        return $this->getIndexKey('page', '', $pid);
114
    }
115
116
    /**
117
     * Return a list of all pages
118
     * Warning: pages may not exist in the filesystem.
119
     *
120
     * @return array            list of page names
121
     */
122
    public function getPages()
123
    {
124
        return array_filter($this->getIndex('page', ''),
125
            function ($v) {
126
                return ($v[0] !== self::INDEX_MARK_DELETED[0]);
127
            }
128
        );
129
    }
130
131
    /**
132
     * Lock the indexer
133
     *
134
     * @return true
135
     * @throws IndexLockException
136
     * @author Tom N Harris <[email protected]>
137
     *
138
     */
139
    protected function lock()
140
    {
141
        global $conf;
142
        $run = 0;
143
        $lock = $conf['lockdir'].'/_indexer.lock';
144
        while (!@mkdir($lock, $conf['dmode'])) {
145
            usleep(50);
146
            if (is_dir($lock) && time() - @filemtime($lock) > 60*5) {
147
                // looks like a stale lock - remove it
148
                if (!@rmdir($lock)) {
149
                    throw new IndexLockException('Indexer: removing the stale lock failed');
150
                }
151
            } elseif ($run++ == 1000) {
152
                // we waited 5 seconds for that lock
153
                throw new IndexLockException('Indexer: time out to aquire lock');
154
            }
155
        }
156
        if (!empty($conf['dperm'])) {
157
            chmod($lock, $conf['dperm']);
158
        }
159
        return true;
160
    }
161
162
    /**
163
     * Release the indexer lock
164
     *
165
     * @return true
166
     * @throws IndexLockException
167
     * @author Tom N Harris <[email protected]>
168
     *
169
     */
170
    protected function unlock()
171
    {
172
        global $conf;
173
        if (!@rmdir($conf['lockdir'].'/_indexer.lock')) {
174
            throw new IndexLockException('Indexer: unlock failed');
175
        }
176
        return true;
177
    }
178
179
    /**
180
     * Retrieve the entire index
181
     *
182
     * The $suffix argument is for an index that is split into multiple parts.
183
     * Different index files should use different base names.
184
     *
185
     * @param string    $idx    name of the index
186
     * @param string    $suffix subpart identifier
187
     * @return array            list of lines without CR or LF
188
     *
189
     * @author Tom N Harris <[email protected]>
190
     */
191
    public function getIndex($idx, $suffix)
192
    {
193
        global $conf;
194
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
195
        if (!file_exists($fn)) return array();
196
        return file($fn, FILE_IGNORE_NEW_LINES);
197
    }
198
199
    /**
200
     * Replace the contents of the index with an array
201
     *
202
     * @param string $idx name of the index
203
     * @param string $suffix subpart identifier
204
     * @param array $lines list of lines without LF
205
     * @return true
206
     *
207
     * @throws IndexWriteException
208
     * @author Tom N Harris <[email protected]>
209
     */
210
    protected function saveIndex($idx, $suffix, $lines)
211
    {
212
        global $conf;
213
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
214
        $fh = @fopen($fn.'.tmp', 'w');
215
        if (!$fh) {
216
            throw new IndexWriteException("Failed to write {$idx}{$suffix} index");
217
        }
218
        fwrite($fh, implode("\n", $lines));
219
        if (!empty($lines)) {
220
            fwrite($fh, "\n");
221
        }
222
        fclose($fh);
223
        if ($conf['fperm']) {
224
            chmod($fn.'.tmp', $conf['fperm']);
225
        }
226
        io_rename($fn.'.tmp', $fn.'.idx');
227
        return true;
228
    }
229
230
    /**
231
     * Retrieve or insert a value in the index
232
     *
233
     * @param string $idx name of the index
234
     * @param string $suffix subpart identifier
235
     * @param string $value line to find in the index
236
     * @return int  line number of the value in the index
237
     *
238
     * @throws IndexWriteException
239
     * @author Tom N Harris <[email protected]>
240
     */
241
    protected function addIndexKey($idx, $suffix, $value)
242
    {
243
        $index = $this->getIndex($idx, $suffix);
244
        $id = array_search($value, $index, true);
245
        if ($id === false) {
246
            $id = count($index);
247
            $index[$id] = $value;
248
            $this->saveIndex($idx, $suffix, $index);
249
        }
250
        return (int) $id;
251
    }
252
253
    /**
254
     * Write a line into the index
255
     *
256
     * @param string $idx name of the index
257
     * @param string $suffix subpart identifier
258
     * @param int $id the line number
259
     * @param string $line line to write
260
     * @return true
261
     *
262
     * @throws IndexWriteException
263
     * @author Tom N Harris <[email protected]>
264
     */
265
    protected function saveIndexKey($idx, $suffix, $id, $line)
266
    {
267
        global $conf;
268
        if (substr($line, -1) !== "\n") {
269
            $line .= "\n";
270
        }
271
        $fn = $conf['indexdir'].'/'.$idx.$suffix;
272
        $fh = @fopen($fn.'.tmp', 'w');
273
        if (!$fh) {
274
            throw new IndexWriteException("Failed to write {$idx}{$suffix} index");
275
        }
276
        $ih = @fopen($fn.'.idx', 'r');
277
        if ($ih) {
278
            $ln = -1;
279
            while (($curline = fgets($ih)) !== false) {
280
                fwrite($fh, (++$ln == $id) ? $line : $curline);
281
            }
282
            if ($id > $ln) {
283
                while ($id > ++$ln) {
284
                    fwrite($fh, "\n");
285
                }
286
                fwrite($fh, $line);
287
            }
288
            fclose($ih);
289
        } else {
290
            $ln = -1;
291
            while ($id > ++$ln) {
292
                fwrite($fh, "\n");
293
            }
294
            fwrite($fh, $line);
295
        }
296
        fclose($fh);
297
        if ($conf['fperm']) {
298
            chmod($fn.'.tmp', $conf['fperm']);
299
        }
300
        io_rename($fn.'.tmp', $fn.'.idx');
301
        return true;
302
    }
303
304
    /**
305
     * Retrieve a line from the index
306
     *
307
     * @param string    $idx    name of the index
308
     * @param string    $suffix subpart identifier
309
     * @param int       $id     the line number
310
     * @return string           a line with trailing whitespace removed
311
     *
312
     * @author Tom N Harris <[email protected]>
313
     */
314
    protected function getIndexKey($idx, $suffix, $id)
315
    {
316
        global $conf;
317
        $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
318
        if (!file_exists($fn)) return '';
319
        $fh = @fopen($fn, 'r');
320
        if (!$fh) return '';
321
        $ln = -1;
322
        while (($line = fgets($fh)) !== false) {
323
            if (++$ln == $id) break;
324
        }
325
        fclose($fh);
326
        return rtrim((string)$line);
327
    }
328
329
    /**
330
     * Insert or replace a tuple in a line
331
     *
332
     * @author Tom N Harris <[email protected]>
333
     *
334
     * @param string     $line
335
     * @param int|string $id
336
     * @param int        $count
337
     * @return string
338
     */
339
    protected function updateTuple($line, $id, $count)
340
    {
341
        if ($line != '') {
342
            $line = preg_replace('/(^|:)'.preg_quote($id,'/').'\*\d*/', '', $line);
343
        }
344
        $line = trim($line, ':');
345
        if ($count) {
346
            if ($line) {
347
                return "{$id}*{$count}:".$line;
348
            } else {
349
                return "{$id}*{$count}";
350
            }
351
        }
352
        return $line;
353
    }
354
355
    /**
356
     * Split a line into an array of tuples
357
     *
358
     * @author Tom N Harris <[email protected]>
359
     * @author Andreas Gohr <[email protected]>
360
     *
361
     * @param array      $keys
362
     * @param string     $line
363
     * @return array
364
     */
365
    protected function parseTuples($keys, $line)
366
    {
367
        $result = array();
368
        if ($line == '') return $result;
369
        $parts = explode(':', $line);
370
        foreach ($parts as $tuple) {
371
            if ($tuple === '') continue;
372
            list($key, $cnt) = explode('*', $tuple);
373
            if (!$cnt) continue;
374
            $key = $keys[$key];
375
            if ($key === false || is_null($key)) continue;
376
            $result[$key] = $cnt;
377
        }
378
        return $result;
379
    }
380
381
    /**
382
     * Sum the counts in a list of tuples
383
     *
384
     * @author Tom N Harris <[email protected]>
385
     *
386
     * @param string     $line
387
     * @return int
388
     */
389
    protected function countTuples($line)
390
    {
391
        $freq = 0;
392
        $parts = explode(':', $line);
393
        foreach ($parts as $tuple) {
394
            if ($tuple === '') continue;
395
            list(/* $pid */, $cnt) = explode('*', $tuple);
396
            $freq += (int)$cnt;
397
        }
398
        return $freq;
399
    }
400
401
    /**
402
     * Clear the whole index
403
     *
404
     * @return bool  If the index has been cleared successfully
405
     */
406
    abstract public function clear();
407
}
408