Failed Conditions
Pull Request — master (#2943)
by Andreas
03:19
created

MetadataIndex::addMetaKeys()   F

Complexity

Conditions 22
Paths 6936

Size

Total Lines 94

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 22
nc 6936
nop 4
dl 0
loc 94
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace dokuwiki\Search;
4
5
/**
6
 * Class DokuWiki Metadata Index (Singleton)
7
 *
8
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
9
 * @author     Andreas Gohr <[email protected]>
10
 * @author Tom N Harris <[email protected]>
11
 */
12
class MetadataIndex extends AbstractIndex
13
{
14
    /** @var MetadataIndex $instance */
15
    protected static $instance = null;
16
17
    /**
18
     * Get new or existing singleton instance of the MetadataIndex
19
     *
20
     * @return MetadataIndex
21
     */
22
    public static function getInstance()
23
    {
24
        if (is_null(static::$instance)) {
25
            static::$instance = new static();
26
        }
27
        return static::$instance;
28
    }
29
30
    /**
31
     * Return a list of pages containing the metadata key
32
     * Note: override parent class methods
33
     *
34
     * @param string    $key    list only pages containing the metadata key
35
     * @return array            list of page names
36
     *
37
     * @author Tom N Harris <[email protected]>
38
     */
39
    public function getPages($key = null)
40
    {
41
        $page_idx = $this->getIndex('page', '');
42
        if (is_null($key)) return $page_idx; // same as parent method
43
44
        // Special handling for titles
45
        if ($key == 'title') {
46
            $title_idx = $this->getIndex('title', '');
47
            array_splice($page_idx, count($title_idx));
48
            foreach ($title_idx as $i => $title) {
49
                if ($title === '') unset($page_idx[$i]);
50
            }
51
            return array_values($page_idx);
52
        }
53
54
        $metaname = $this->cleanName($key);
55
        $pages = array();
56
        $lines = $this->getIndex($metaname.'_i', '');
57
        foreach ($lines as $line) {
58
            $pages = array_merge($pages, $this->parseTuples($page_idx, $line));
59
        }
60
        return array_keys($pages);
61
    }
62
63
    /**
64
     * Add/update keys to/of the metadata index
65
     *
66
     * Adding new keys does not remove other keys for the page.
67
     * An empty value will erase the key.
68
     * The $key parameter can be an array to add multiple keys. $value will
69
     * not be used if $key is an array.
70
     *
71
     * @param string $page   a page name
72
     * @param mixed  $key    a key string or array of key=>value pairs
73
     * @param mixed  $value  the value or list of values
74
     * @param bool   $requireLock  should be false only if the caller is resposible for index lock
75
     * @return bool  if the function completed successfully
76
     *
77
     * @author Tom N Harris <[email protected]>
78
     * @author Michael Hamann <[email protected]>
79
     */
80
    public function addMetaKeys($page, $key, $value = null, $requireLock = true)
81
    {
82
        if (!is_array($key)) {
83
            $key = array($key => $value);
84
        } elseif (!is_null($value)) {
85
            // $key is array, but $value is not null
86
            trigger_error("array passed to addMetaKeys but value is not null", E_USER_WARNING);
87
        }
88
89
        // load known documents
90
        $pid = $this->getPID($page);
91
        if ($pid === false) {
92
            return false;
93
        }
94
95
        if ($requireLock && !$this->lock()) return false;
96
97
        // Special handling for titles so the index file is simpler
98
        if (array_key_exists('title', $key)) {
99
            $value = $key['title'];
100
            if (is_array($value)) {
101
                $value = $value[0];
102
            }
103
            $this->saveIndexKey('title', '', $pid, $value);
104
            unset($key['title']);
105
        }
106
107
        foreach ($key as $name => $values) {
108
            $metaname = $this->cleanName($name);
109
            $this->addIndexKey('metadata', '', $metaname);
110
            $metaidx = $this->getIndex($metaname.'_i', '');
111
            $metawords = $this->getIndex($metaname.'_w', '');
112
            $addwords = false;
113
114
            if (!is_array($values)) $values = array($values);
115
116
            $val_idx = $this->getIndexKey($metaname.'_p', '', $pid);
117
            if ($val_idx !== '') {
118
                $val_idx = explode(':', $val_idx);
119
                // -1 means remove, 0 keep, 1 add
120
                $val_idx = array_combine($val_idx, array_fill(0, count($val_idx), -1));
121
            } else {
122
                $val_idx = array();
123
            }
124
125
            foreach ($values as $val) {
126
                $val = (string)$val;
127
                if ($val !== '') {
128
                    $id = array_search($val, $metawords, true);
129
                    if ($id === false) {
130
                        // didn't find $val, so we'll add it to the end of metawords
131
                        // and create a placeholder in metaidx
132
                        $id = count($metawords);
133
                        $metawords[$id] = $val;
134
                        $metaidx[$id] = '';
135
                        $addwords = true;
136
                    }
137
                    // test if value is already in the index
138
                    if (isset($val_idx[$id]) && $val_idx[$id] <= 0) {
139
                        $val_idx[$id] = 0;
140
                    } else { // else add it
141
                        $val_idx[$id] = 1;
142
                    }
143
                }
144
            }
145
146
            if ($addwords) {
147
                $this->saveIndex($metaname.'_w', '', $metawords);
148
            }
149
            $vals_changed = false;
150
            foreach ($val_idx as $id => $action) {
151
                if ($action == -1) {
152
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 0);
153
                    $vals_changed = true;
154
                    unset($val_idx[$id]);
155
                } elseif ($action == 1) {
156
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 1);
157
                    $vals_changed = true;
158
                }
159
            }
160
161
            if ($vals_changed) {
162
                $this->saveIndex($metaname.'_i', '', $metaidx);
163
                $val_idx = implode(':', array_keys($val_idx));
164
                $this->saveIndexKey($metaname.'_p', '', $pid, $val_idx);
165
            }
166
167
            unset($metaidx);
168
            unset($metawords);
169
        }
170
171
        if ($requireLock) $this->unlock();
172
        return true;
173
    }
174
175
    /**
176
     * Delete keys of the page from metadata index
177
     *
178
     * @param string $page   a page name
179
     * @param mixed  $keys   a key string or array of keys
180
     * @param bool   $requireLock  should be false only if the caller is resposible for index lock
181
     * @return bool  If renaming the value has been successful, false on error
182
     *
183
     * @author Tom N Harris <[email protected]>
184
     * @author Satoshi Sahara <[email protected]>
185
     */
186
    public function deleteMetaKeys($page, $keys = [], $requireLock = true)
187
    {
188
        // load known documents
189
        $pid = $this->getPID($page);
190
        if ($pid === false) {
191
            return false;
192
        }
193
194
        if ($requireLock && !$this->lock()) return false;
195
196
        $knownKeys = $this->getIndex('metadata', '');
197
        $knownKeys[] = 'title';
198
199
        // remove all metadata keys of the page when $keys is empty
200
        $keys = (empty($keys)) ? $knownKeys : (array)$keys;
201
202
        foreach ($keys as $metaname) {
203
            if ($metaname == 'title') {
204
                // Special handling for titles so the index file is simpler
205
                $this->saveIndexKey('title', '', $pid, '');
206
            } elseif (in_array($metaname, $knownKeys)) {
207
                $meta_idx = $this->getIndex($metaname.'_i', '');
208
                $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid));
209
                foreach ($val_idx as $id) {
210
                    if ($id === '') continue;
211
                    $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0);
212
                }
213
                $this->saveIndex($metaname.'_i', '', $meta_idx);
214
                $this->saveIndexKey($metaname.'_p', '', $pid, '');
215
            }
216
        }
217
218
        if ($requireLock) $this->unlock();
219
        return true;
220
    }
221
222
    /**
223
     * Find pages containing a metadata key
224
     *
225
     * The metadata values are compared as case-sensitive strings. Pass a
226
     * callback function that returns true or false to use a different
227
     * comparison function. The function will be called with the $value being
228
     * searched for as the first argument, and the word in the index as the
229
     * second argument. The function preg_match can be used directly if the
230
     * values are regexes.
231
     *
232
     * @param string    $key    name of the metadata key to look for
233
     * @param string    $value  search term to look for, must be a string or array of strings
234
     * @param callback  $func   comparison function
235
     * @return array            lists with page names, keys are query values if $value is array
236
     *
237
     * @author Tom N Harris <[email protected]>
238
     * @author Michael Hamann <[email protected]>
239
     */
240
    public function lookupKey($key, &$value, $func = null)
241
    {
242
        if (!is_array($value)) {
243
            $value_array = array($value);
244
        } else {
245
            $value_array =& $value;
246
        }
247
248
        // the matching ids for the provided value(s)
249
        $value_ids = array();
250
251
        $metaname = $this->cleanName($key);
252
253
        // get all words in order to search the matching ids
254
        if ($key == 'title') {
255
            $words = $this->getIndex('title', '');
256
        } else {
257
            $words = $this->getIndex($metaname.'_w', '');
258
        }
259
260
        if (!is_null($func)) {
261
            foreach ($value_array as $val) {
262
                foreach ($words as $i => $word) {
263
                    if (call_user_func_array($func, array($val, $word))) {
264
                        $value_ids[$i][] = $val;
265
                    }
266
                }
267
            }
268
        } else {
269
            foreach ($value_array as $val) {
270
                $xval = $val;
271
                $caret = '^';
272
                $dollar = '$';
273
                // check for wildcards
274
                if (substr($xval, 0, 1) == '*') {
275
                    $xval = substr($xval, 1);
276
                    $caret = '';
277
                }
278
                if (substr($xval, -1, 1) == '*') {
279
                    $xval = substr($xval, 0, -1);
280
                    $dollar = '';
281
                }
282
                if (!$caret || !$dollar) {
283
                    $re = $caret.preg_quote($xval, '/').$dollar;
284
                    foreach (array_keys(preg_grep('/'.$re.'/', $words)) as $i) {
285
                        $value_ids[$i][] = $val;
286
                    }
287
                } else {
288
                    if (($i = array_search($val, $words, true)) !== false) {
289
                        $value_ids[$i][] = $val;
290
                    }
291
                }
292
            }
293
        }
294
295
        unset($words); // free the used memory
296
297
        // initialize the result so it won't be null
298
        $result = array();
299
        foreach ($value_array as $val) {
300
            $result[$val] = array();
301
        }
302
303
        $page_idx = $this->getIndex('page', '');
304
305
        // Special handling for titles
306
        if ($key == 'title') {
307
            foreach ($value_ids as $pid => $val_list) {
308
                $page = $page_idx[$pid];
309
                foreach ($val_list as $val) {
310
                    $result[$val][] = $page;
311
                }
312
            }
313
        } else {
314
            // load all lines and pages so the used lines can be taken
315
            // and matched with the pages
316
            $lines = $this->getIndex($metaname.'_i', '');
317
318
            foreach ($value_ids as $value_id => $val_list) {
319
                // parse the tuples of the form page_id*1:page2_id*1 and so on,
320
                // return value is an array with page_id => 1, page2_id => 1 etc.
321
                // so take the keys only
322
                $pages = array_keys($this->parseTuples($page_idx, $lines[$value_id]));
323
                foreach ($val_list as $val) {
324
                    $result[$val] = array_merge($result[$val], $pages);
325
                }
326
            }
327
        }
328
        if (!is_array($value)) $result = $result[$value];
329
        return $result;
330
    }
331
332
    /**
333
     * Renames a meta value in the index
334
     * This doesn't change the meta value in the pages, it assumes that
335
     * all pages will be updated.
336
     *
337
     * @param string $key       The metadata key of which a value shall be changed
338
     * @param string $oldvalue  The old value that shall be renamed
339
     * @param string $newvalue  The new value to which the old value shall be renamed,
340
     *                          if exists values will be merged
341
     * @return bool  If renaming the value has been successful, false on error
342
     */
343
    public function renameMetaValue($key, $oldvalue, $newvalue)
344
    {
345
        if (!$this->lock()) return false;
346
347
        // change the relation references index
348
        $metavalues = $this->getIndex($key, '_w');
349
        $oldid = array_search($oldvalue, $metavalues, true);
350
        if ($oldid !== false) {
351
            $newid = array_search($newvalue, $metavalues, true);
352
            if ($newid !== false) {
353
                // free memory
354
                unset($metavalues);
355
356
                // okay, now we have two entries for the same value. we need to merge them.
357
                $indexline = $this->getIndexKey($key.'_i', '', $oldid);
358
                if ($indexline != '') {
359
                    $newindexline = $this->getIndexKey($key.'_i', '', $newid);
360
                    $pagekeys     = $this->getIndex($key.'_p', '');
361
                    $parts = explode(':', $indexline);
362
                    foreach ($parts as $part) {
363
                        list($id, $count) = explode('*', $part);
364
                        $newindexline = $this->updateTuple($newindexline, $id, $count);
365
366
                        $keyline = explode(':', $pagekeys[$id]);
367
                        // remove old meta value
368
                        $keyline = array_diff($keyline, array($oldid));
369
                        // add new meta value when not already present
370
                        if (!in_array($newid, $keyline)) {
371
                            array_push($keyline, $newid);
372
                        }
373
                        $pagekeys[$id] = implode(':', $keyline);
374
                    }
375
                    $this->saveIndex($key.'_p', '', $pagekeys);
376
                    unset($pagekeys);
377
                    $this->saveIndexKey($key.'_i', '', $oldid, '');
378
                    $this->saveIndexKey($key.'_i', '', $newid, $newindexline);
379
                }
380
            } else {
381
                $metavalues[$oldid] = $newvalue;
382
                if (!$this->saveIndex($key.'_w', '', $metavalues)) {
383
                    $this->unlock();
384
                    return false;
385
                }
386
            }
387
        }
388
389
        $this->unlock();
390
        return true;
391
    }
392
393
    /**
394
     * Return a list of words or frequency sorted by number of times used
395
     *
396
     * @param int       $min    bottom frequency threshold
397
     * @param int       $max    upper frequency limit. No limit if $max<$min
398
     * @param int       $minlen minimum length of words to count
399
     * @param string    $key    metadata key to list. Uses the fulltext index if not given
400
     * @return array            list of words as the keys and frequency as values
401
     *
402
     * @author Tom N Harris <[email protected]>
403
     */
404
    public function histogram($min=1, $max=0, $minlen=3, $key=null)
405
    {
406
        if ($min < 1)    $min = 1;
407
        if ($max < $min) $max = 0;
408
409
        $result = array();
410
411
        if ($key == 'title') {
412
            $index = $this->getIndex('title', '');
413
            $index = array_count_values($index);
414
            foreach ($index as $val => $cnt) {
415
                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen) {
416
                    $result[$val] = $cnt;
417
                }
418
            }
419
        } elseif (!is_null($key)) {
420
            $metaname = $this->cleanName($key);
421
            $index = $this->getIndex($metaname.'_i', '');
422
            $val_idx = array();
423
            foreach ($index as $wid => $line) {
424
                $freq = $this->countTuples($line);
425
                if ($freq >= $min && (!$max || $freq <= $max)) {
426
                    $val_idx[$wid] = $freq;
427
                }
428
            }
429
            if (!empty($val_idx)) {
430
                $words = $this->getIndex($metaname.'_w', '');
431
                foreach ($val_idx as $wid => $freq) {
432
                    if (strlen($words[$wid]) >= $minlen) {
433
                        $result[$words[$wid]] = $freq;
434
                    }
435
                }
436
            }
437
        } else {
438
            $FulltextIndex = Search\FulltextIndex::getInstance();
439
            $lengths = $FulltextIndex->listIndexLengths();
440
            foreach ($lengths as $length) {
441
                if ($length < $minlen) continue;
442
                $index = $this->getIndex('i', $length);
443
                $words = null;
444
                foreach ($index as $wid => $line) {
445
                    $freq = $this->countTuples($line);
446
                    if ($freq >= $min && (!$max || $freq <= $max)) {
447
                        if ($words === null) {
448
                            $words = $this->getIndex('w', $length);
449
                        }
450
                        $result[$words[$wid]] = $freq;
451
                    }
452
                }
453
            }
454
        }
455
456
        arsort($result);
457
        return $result;
458
    }
459
460
    /**
461
     * Clear the Metadata Index
462
     *
463
     * @param bool   $requireLock  should be false only if the caller is resposible for index lock
464
     * @return bool  If the index has been cleared successfully
465
     */
466
    public function clear($requireLock = true)
467
    {
468
        global $conf;
469
470
        if ($requireLock && !$this->lock()) return false;
471
472
        $knownKeys = $this->getIndex('metadata', '');
473
        foreach ($knownKeys as $metaname) {
474
            @unlink($conf['indexdir'].'/'.$metaname.'_w.idx');
475
            @unlink($conf['indexdir'].'/'.$metaname.'_i.idx');
476
            @unlink($conf['indexdir'].'/'.$metaname.'_p.idx');
477
        }
478
        @unlink($conf['indexdir'].'/title.idx');
479
        @unlink($conf['indexdir'].'/metadata.idx');
480
481
        if ($requireLock) $this->unlock();
482
        return true;
483
    }
484
485
    /**
486
     * Returns the backlinks for a given page
487
     *
488
     * Uses the metadata index.
489
     *
490
     * @param string $id           The id for which links shall be returned
491
     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
492
     * @return array The pages that contain links to the given page
493
     *
494
     * @author     Andreas Gohr <[email protected]>
495
     */
496
    public function backlinks($id, $ignore_perms = false)
497
    {
498
        $result = $this->lookupKey('relation_references', $id);
499
500
        if (!count($result)) return $result;
501
502
        // check ACL permissions
503
        foreach (array_keys($result) as $idx) {
504
            if (($ignore_perms !== true
505
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
506
                ) || !page_exists($result[$idx], '', false)
507
            ) {
508
                unset($result[$idx]);
509
            }
510
        }
511
512
        sort($result);
513
        return $result;
514
    }
515
516
    /**
517
     * Returns the pages that use a given media file
518
     *
519
     * Uses the relation media metadata property and the metadata index.
520
     *
521
     * Note that before 2013-07-31 the second parameter was the maximum number
522
     * of results and permissions were ignored. That's why the parameter is now
523
     * checked to be explicitely set to true (with type bool) in order to be
524
     * compatible with older uses of the function.
525
     *
526
     * @param string $id           The media id to look for
527
     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
528
     * @return array A list of pages that use the given media file
529
     *
530
     * @author     Andreas Gohr <[email protected]>
531
     */
532
    public function mediause($id, $ignore_perms = false)
533
    {
534
        $result = $this->lookupKey('relation_media', $id);
535
536
        if (!count($result)) return $result;
537
538
        // check ACL permissions
539
        foreach (array_keys($result) as $idx) {
540
            if (($ignore_perms !== true
541
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
542
                ) || !page_exists($result[$idx], '', false)
543
            ) {
544
                unset($result[$idx]);
545
            }
546
        }
547
548
        sort($result);
549
        return $result;
550
    }
551
}
552