Failed Conditions
Pull Request — master (#2943)
by Andreas
03:32
created

MetadataIndex::lookupKey()   F

Complexity

Conditions 21
Paths 480

Size

Total Lines 91

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 21
nc 480
nop 3
dl 0
loc 91
rs 0.7221
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexAccessException;
6
7
/**
8
 * Class DokuWiki Metadata Index (Singleton)
9
 *
10
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
11
 * @author     Andreas Gohr <[email protected]>
12
 * @author Tom N Harris <[email protected]>
13
 */
14
class MetadataIndex extends AbstractIndex
15
{
16
    /** @var MetadataIndex $instance */
17
    protected static $instance = null;
18
19
    /**
20
     * Get new or existing singleton instance of the MetadataIndex
21
     *
22
     * @return MetadataIndex
23
     */
24
    public static function getInstance()
25
    {
26
        if (is_null(static::$instance)) {
27
            static::$instance = new static();
28
        }
29
        return static::$instance;
30
    }
31
32
    /**
33
     * Return a list of pages containing the metadata key
34
     * Note: override parent class methods
35
     *
36
     * @param string    $key    list only pages containing the metadata key
37
     * @return array            list of page names
38
     *
39
     * @author Tom N Harris <[email protected]>
40
     */
41
    public function getPages($key = null)
42
    {
43
        $page_idx = $this->getIndex('page', '');
44
        if (is_null($key)) return $page_idx; // same as parent method
45
46
        // Special handling for titles
47
        if ($key == 'title') {
48
            $title_idx = $this->getIndex('title', '');
49
            array_splice($page_idx, count($title_idx));
50
            foreach ($title_idx as $i => $title) {
51
                if ($title === '') unset($page_idx[$i]);
52
            }
53
            return array_values($page_idx);
54
        }
55
56
        $metaname = $this->cleanName($key);
57
        $pages = array();
58
        $lines = $this->getIndex($metaname.'_i', '');
59
        foreach ($lines as $line) {
60
            $pages = array_merge($pages, $this->parseTuples($page_idx, $line));
61
        }
62
        return array_keys($pages);
63
    }
64
65
    /**
66
     * Add/update keys to/of the metadata index
67
     *
68
     * Adding new keys does not remove other keys for the page.
69
     * An empty value will erase the key.
70
     * The $key parameter can be an array to add multiple keys. $value will
71
     * not be used if $key is an array.
72
     *
73
     * @param string $page a page name
74
     * @param mixed $key a key string or array of key=>value pairs
75
     * @param mixed $value the value or list of values
76
     * @param bool $requireLock should be false only if the caller is resposible for index lock
77
     * @return bool  if the function completed successfully
78
     *
79
     * @throws IndexAccessException
80
     * @throws Exception\IndexLockException
81
     * @throws Exception\IndexWriteException
82
     * @author Michael Hamann <[email protected]>
83
     * @author Tom N Harris <[email protected]>
84
     */
85
    public function addMetaKeys($page, $key, $value = null, $requireLock = true)
86
    {
87
        if (!is_array($key)) {
88
            $key = array($key => $value);
89
        } elseif (!is_null($value)) {
90
            // $key is array, but $value is not null
91
            throw new IndexAccessException('array passed to addMetaKeys but value is not null');
92
        }
93
94
        // load known documents
95
        $pid = $this->getPID($page);
96
        if ($pid === false) {
97
            return false;
98
        }
99
100
        if ($requireLock) $this->lock();
101
102
        // Special handling for titles so the index file is simpler
103
        if (array_key_exists('title', $key)) {
104
            $value = $key['title'];
105
            if (is_array($value)) {
106
                $value = $value[0];
107
            }
108
            $this->saveIndexKey('title', '', $pid, $value);
109
            unset($key['title']);
110
        }
111
112
        foreach ($key as $name => $values) {
113
            $metaname = $this->cleanName($name);
114
            $this->addIndexKey('metadata', '', $metaname);
115
            $metaidx = $this->getIndex($metaname.'_i', '');
116
            $metawords = $this->getIndex($metaname.'_w', '');
117
            $addwords = false;
118
119
            if (!is_array($values)) $values = array($values);
120
121
            $val_idx = $this->getIndexKey($metaname.'_p', '', $pid);
122
            if ($val_idx !== '') {
123
                $val_idx = explode(':', $val_idx);
124
                // -1 means remove, 0 keep, 1 add
125
                $val_idx = array_combine($val_idx, array_fill(0, count($val_idx), -1));
126
            } else {
127
                $val_idx = array();
128
            }
129
130
            foreach ($values as $val) {
131
                $val = (string)$val;
132
                if ($val !== '') {
133
                    $id = array_search($val, $metawords, true);
134
                    if ($id === false) {
135
                        // didn't find $val, so we'll add it to the end of metawords
136
                        // and create a placeholder in metaidx
137
                        $id = count($metawords);
138
                        $metawords[$id] = $val;
139
                        $metaidx[$id] = '';
140
                        $addwords = true;
141
                    }
142
                    // test if value is already in the index
143
                    if (isset($val_idx[$id]) && $val_idx[$id] <= 0) {
144
                        $val_idx[$id] = 0;
145
                    } else { // else add it
146
                        $val_idx[$id] = 1;
147
                    }
148
                }
149
            }
150
151
            if ($addwords) {
152
                $this->saveIndex($metaname.'_w', '', $metawords);
153
            }
154
            $vals_changed = false;
155
            foreach ($val_idx as $id => $action) {
156
                if ($action == -1) {
157
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 0);
158
                    $vals_changed = true;
159
                    unset($val_idx[$id]);
160
                } elseif ($action == 1) {
161
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 1);
162
                    $vals_changed = true;
163
                }
164
            }
165
166
            if ($vals_changed) {
167
                $this->saveIndex($metaname.'_i', '', $metaidx);
168
                $val_idx = implode(':', array_keys($val_idx));
169
                $this->saveIndexKey($metaname.'_p', '', $pid, $val_idx);
170
            }
171
172
            unset($metaidx);
173
            unset($metawords);
174
        }
175
176
        if ($requireLock) $this->unlock();
177
        return true;
178
    }
179
180
    /**
181
     * Delete keys of the page from metadata index
182
     *
183
     * @param string $page a page name
184
     * @param mixed $keys a key string or array of keys
185
     * @param bool $requireLock should be false only if the caller is resposible for index lock
186
     * @return bool  If renaming the value has been successful, false on error
187
     *
188
     * @throws Exception\IndexLockException
189
     * @author Satoshi Sahara <[email protected]>
190
     * @author Tom N Harris <[email protected]>
191
     */
192
    public function deleteMetaKeys($page, $keys = [], $requireLock = true)
193
    {
194
        // load known documents
195
        $pid = $this->getPID($page);
196
        if ($pid === false) {
197
            return false;
198
        }
199
200
        if ($requireLock) $this->lock();
201
202
        $knownKeys = $this->getIndex('metadata', '');
203
        $knownKeys[] = 'title';
204
205
        // remove all metadata keys of the page when $keys is empty
206
        $keys = (empty($keys)) ? $knownKeys : (array)$keys;
207
208
        foreach ($keys as $metaname) {
209
            if ($metaname == 'title') {
210
                // Special handling for titles so the index file is simpler
211
                $this->saveIndexKey('title', '', $pid, '');
212
            } elseif (in_array($metaname, $knownKeys)) {
213
                $meta_idx = $this->getIndex($metaname.'_i', '');
214
                $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid));
215
                foreach ($val_idx as $id) {
216
                    if ($id === '') continue;
217
                    $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0);
218
                }
219
                $this->saveIndex($metaname.'_i', '', $meta_idx);
220
                $this->saveIndexKey($metaname.'_p', '', $pid, '');
221
            }
222
        }
223
224
        if ($requireLock) $this->unlock();
225
        return true;
226
    }
227
228
    /**
229
     * Find pages containing a metadata key
230
     *
231
     * The metadata values are compared as case-sensitive strings. Pass a
232
     * callback function that returns true or false to use a different
233
     * comparison function. The function will be called with the $value being
234
     * searched for as the first argument, and the word in the index as the
235
     * second argument. The function preg_match can be used directly if the
236
     * values are regexes.
237
     *
238
     * @param string    $key    name of the metadata key to look for
239
     * @param string    $value  search term to look for, must be a string or array of strings
240
     * @param callback  $func   comparison function
241
     * @return array            lists with page names, keys are query values if $value is array
242
     *
243
     * @author Tom N Harris <[email protected]>
244
     * @author Michael Hamann <[email protected]>
245
     */
246
    public function lookupKey($key, &$value, $func = null)
247
    {
248
        if (!is_array($value)) {
249
            $value_array = array($value);
250
        } else {
251
            $value_array =& $value;
252
        }
253
254
        // the matching ids for the provided value(s)
255
        $value_ids = array();
256
257
        $metaname = $this->cleanName($key);
258
259
        // get all words in order to search the matching ids
260
        if ($key == 'title') {
261
            $words = $this->getIndex('title', '');
262
        } else {
263
            $words = $this->getIndex($metaname.'_w', '');
264
        }
265
266
        if (!is_null($func)) {
267
            foreach ($value_array as $val) {
268
                foreach ($words as $i => $word) {
269
                    if (call_user_func_array($func, array($val, $word))) {
270
                        $value_ids[$i][] = $val;
271
                    }
272
                }
273
            }
274
        } else {
275
            foreach ($value_array as $val) {
276
                $xval = $val;
277
                $caret = '^';
278
                $dollar = '$';
279
                // check for wildcards
280
                if (substr($xval, 0, 1) == '*') {
281
                    $xval = substr($xval, 1);
282
                    $caret = '';
283
                }
284
                if (substr($xval, -1, 1) == '*') {
285
                    $xval = substr($xval, 0, -1);
286
                    $dollar = '';
287
                }
288
                if (!$caret || !$dollar) {
289
                    $re = $caret.preg_quote($xval, '/').$dollar;
290
                    foreach (array_keys(preg_grep('/'.$re.'/', $words)) as $i) {
291
                        $value_ids[$i][] = $val;
292
                    }
293
                } else {
294
                    if (($i = array_search($val, $words, true)) !== false) {
295
                        $value_ids[$i][] = $val;
296
                    }
297
                }
298
            }
299
        }
300
301
        unset($words); // free the used memory
302
303
        // initialize the result so it won't be null
304
        $result = array();
305
        foreach ($value_array as $val) {
306
            $result[$val] = array();
307
        }
308
309
        $page_idx = $this->getIndex('page', '');
310
311
        // Special handling for titles
312
        if ($key == 'title') {
313
            foreach ($value_ids as $pid => $val_list) {
314
                $page = $page_idx[$pid];
315
                foreach ($val_list as $val) {
316
                    $result[$val][] = $page;
317
                }
318
            }
319
        } else {
320
            // load all lines and pages so the used lines can be taken
321
            // and matched with the pages
322
            $lines = $this->getIndex($metaname.'_i', '');
323
324
            foreach ($value_ids as $value_id => $val_list) {
325
                // parse the tuples of the form page_id*1:page2_id*1 and so on,
326
                // return value is an array with page_id => 1, page2_id => 1 etc.
327
                // so take the keys only
328
                $pages = array_keys($this->parseTuples($page_idx, $lines[$value_id]));
329
                foreach ($val_list as $val) {
330
                    $result[$val] = array_merge($result[$val], $pages);
331
                }
332
            }
333
        }
334
        if (!is_array($value)) $result = $result[$value];
335
        return $result;
336
    }
337
338
    /**
339
     * Renames a meta value in the index
340
     * This doesn't change the meta value in the pages, it assumes that
341
     * all pages will be updated.
342
     *
343
     * @param string $key The metadata key of which a value shall be changed
344
     * @param string $oldvalue The old value that shall be renamed
345
     * @param string $newvalue The new value to which the old value shall be renamed,
346
     *                          if exists values will be merged
347
     * @return bool  If renaming the value has been successful, false on error
348
     * @throws Exception\IndexLockException
349
     */
350
    public function renameMetaValue($key, $oldvalue, $newvalue)
351
    {
352
        $this->lock();
353
354
        // change the relation references index
355
        $metavalues = $this->getIndex($key, '_w');
356
        $oldid = array_search($oldvalue, $metavalues, true);
357
        if ($oldid !== false) {
358
            $newid = array_search($newvalue, $metavalues, true);
359
            if ($newid !== false) {
360
                // free memory
361
                unset($metavalues);
362
363
                // okay, now we have two entries for the same value. we need to merge them.
364
                $indexline = $this->getIndexKey($key.'_i', '', $oldid);
365
                if ($indexline != '') {
366
                    $newindexline = $this->getIndexKey($key.'_i', '', $newid);
367
                    $pagekeys     = $this->getIndex($key.'_p', '');
368
                    $parts = explode(':', $indexline);
369
                    foreach ($parts as $part) {
370
                        list($id, $count) = explode('*', $part);
371
                        $newindexline = $this->updateTuple($newindexline, $id, $count);
372
373
                        $keyline = explode(':', $pagekeys[$id]);
374
                        // remove old meta value
375
                        $keyline = array_diff($keyline, array($oldid));
376
                        // add new meta value when not already present
377
                        if (!in_array($newid, $keyline)) {
378
                            array_push($keyline, $newid);
379
                        }
380
                        $pagekeys[$id] = implode(':', $keyline);
381
                    }
382
                    $this->saveIndex($key.'_p', '', $pagekeys);
383
                    unset($pagekeys);
384
                    $this->saveIndexKey($key.'_i', '', $oldid, '');
385
                    $this->saveIndexKey($key.'_i', '', $newid, $newindexline);
386
                }
387
            } else {
388
                $metavalues[$oldid] = $newvalue;
389
                if (!$this->saveIndex($key.'_w', '', $metavalues)) {
390
                    $this->unlock();
391
                    return false;
392
                }
393
            }
394
        }
395
396
        $this->unlock();
397
        return true;
398
    }
399
400
    /**
401
     * Return a list of words or frequency sorted by number of times used
402
     *
403
     * @param int       $min    bottom frequency threshold
404
     * @param int       $max    upper frequency limit. No limit if $max<$min
405
     * @param int       $minlen minimum length of words to count
406
     * @param string    $key    metadata key to list. Uses the fulltext index if not given
407
     * @return array            list of words as the keys and frequency as values
408
     *
409
     * @author Tom N Harris <[email protected]>
410
     */
411
    public function histogram($min=1, $max=0, $minlen=3, $key=null)
412
    {
413
        if ($min < 1)    $min = 1;
414
        if ($max < $min) $max = 0;
415
416
        $result = array();
417
418
        if ($key == 'title') {
419
            $index = $this->getIndex('title', '');
420
            $index = array_count_values($index);
421
            foreach ($index as $val => $cnt) {
422
                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen) {
423
                    $result[$val] = $cnt;
424
                }
425
            }
426
        } elseif (!is_null($key)) {
427
            $metaname = $this->cleanName($key);
428
            $index = $this->getIndex($metaname.'_i', '');
429
            $val_idx = array();
430
            foreach ($index as $wid => $line) {
431
                $freq = $this->countTuples($line);
432
                if ($freq >= $min && (!$max || $freq <= $max)) {
433
                    $val_idx[$wid] = $freq;
434
                }
435
            }
436
            if (!empty($val_idx)) {
437
                $words = $this->getIndex($metaname.'_w', '');
438
                foreach ($val_idx as $wid => $freq) {
439
                    if (strlen($words[$wid]) >= $minlen) {
440
                        $result[$words[$wid]] = $freq;
441
                    }
442
                }
443
            }
444
        } else {
445
            $FulltextIndex = FulltextIndex::getInstance();
446
            $lengths = $FulltextIndex->listIndexLengths();
447
            foreach ($lengths as $length) {
448
                if ($length < $minlen) continue;
449
                $index = $this->getIndex('i', $length);
450
                $words = null;
451
                foreach ($index as $wid => $line) {
452
                    $freq = $this->countTuples($line);
453
                    if ($freq >= $min && (!$max || $freq <= $max)) {
454
                        if ($words === null) {
455
                            $words = $this->getIndex('w', $length);
456
                        }
457
                        $result[$words[$wid]] = $freq;
458
                    }
459
                }
460
            }
461
        }
462
463
        arsort($result);
464
        return $result;
465
    }
466
467
    /**
468
     * Clear the Metadata Index
469
     *
470
     * @param bool $requireLock should be false only if the caller is resposible for index lock
471
     * @return bool  If the index has been cleared successfully
472
     * @throws Exception\IndexLockException
473
     */
474
    public function clear($requireLock = true)
475
    {
476
        global $conf;
477
478
        if ($requireLock) $this->lock();
479
480
        $knownKeys = $this->getIndex('metadata', '');
481
        foreach ($knownKeys as $metaname) {
482
            @unlink($conf['indexdir'].'/'.$metaname.'_w.idx');
483
            @unlink($conf['indexdir'].'/'.$metaname.'_i.idx');
484
            @unlink($conf['indexdir'].'/'.$metaname.'_p.idx');
485
        }
486
        @unlink($conf['indexdir'].'/title.idx');
487
        @unlink($conf['indexdir'].'/metadata.idx');
488
489
        if ($requireLock) $this->unlock();
490
        return true;
491
    }
492
493
    /**
494
     * Returns the backlinks for a given page
495
     *
496
     * Uses the metadata index.
497
     *
498
     * @param string $id           The id for which links shall be returned
499
     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
500
     * @return array The pages that contain links to the given page
501
     *
502
     * @author     Andreas Gohr <[email protected]>
503
     */
504
    public function backlinks($id, $ignore_perms = false)
505
    {
506
        $result = $this->lookupKey('relation_references', $id);
507
508
        if (!count($result)) return $result;
509
510
        // check ACL permissions
511
        foreach (array_keys($result) as $idx) {
512
            if (($ignore_perms !== true
513
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
514
                ) || !page_exists($result[$idx], '', false)
515
            ) {
516
                unset($result[$idx]);
517
            }
518
        }
519
520
        sort($result);
521
        return $result;
522
    }
523
524
    /**
525
     * Returns the pages that use a given media file
526
     *
527
     * Uses the relation media metadata property and the metadata index.
528
     *
529
     * Note that before 2013-07-31 the second parameter was the maximum number
530
     * of results and permissions were ignored. That's why the parameter is now
531
     * checked to be explicitely set to true (with type bool) in order to be
532
     * compatible with older uses of the function.
533
     *
534
     * @param string $id           The media id to look for
535
     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
536
     * @return array A list of pages that use the given media file
537
     *
538
     * @author     Andreas Gohr <[email protected]>
539
     */
540
    public function mediause($id, $ignore_perms = false)
541
    {
542
        $result = $this->lookupKey('relation_media', $id);
543
544
        if (!count($result)) return $result;
545
546
        // check ACL permissions
547
        foreach (array_keys($result) as $idx) {
548
            if (($ignore_perms !== true
549
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
550
                ) || !page_exists($result[$idx], '', false)
551
            ) {
552
                unset($result[$idx]);
553
            }
554
        }
555
556
        sort($result);
557
        return $result;
558
    }
559
}
560