Failed Conditions
Pull Request — master (#2943)
by
unknown
02:51
created

MetadataIndex   F

Complexity

Total Complexity 107

Size/Duplication

Total Lines 535
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 2

Importance

Changes 0
Metric Value
dl 0
loc 535
rs 2
c 0
b 0
f 0
wmc 107
lcom 2
cbo 2

10 Methods

Rating   Name   Duplication   Size   Complexity  
A getInstance() 0 7 2
B getPages() 0 23 6
F addMetaKeys() 0 87 21
B deleteMetaKeys() 0 32 9
F lookupKey() 0 91 21
B renameMetaValue() 0 46 6
D histogram() 0 55 24
A clear() 0 18 4
B backlinks() 0 19 7
B mediause() 0 19 7

How to fix   Complexity   

Complex Class

Complex classes like MetadataIndex often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use MetadataIndex, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexAccessException;
6
use dokuwiki\Search\Exception\IndexLockException;
7
use dokuwiki\Search\Exception\IndexWriteException;
8
9
/**
10
 * Class DokuWiki Metadata Index (Singleton)
11
 *
12
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13
 * @author     Andreas Gohr <[email protected]>
14
 * @author Tom N Harris <[email protected]>
15
 */
16
class MetadataIndex extends AbstractIndex
17
{
18
    /** @var MetadataIndex $instance */
19
    protected static $instance = null;
20
21
    /**
22
     * Get new or existing singleton instance of the MetadataIndex
23
     *
24
     * @return MetadataIndex
25
     */
26
    public static function getInstance()
27
    {
28
        if (is_null(static::$instance)) {
29
            static::$instance = new static();
30
        }
31
        return static::$instance;
32
    }
33
34
    /**
35
     * Return a list of pages containing the metadata key
36
     * Note: override parent class methods
37
     *
38
     * @param string    $key    list only pages containing the metadata key
39
     * @return array            list of page names
40
     *
41
     * @author Tom N Harris <[email protected]>
42
     */
43
    public function getPages($key = null)
44
    {
45
        $page_idx = $this->getIndex('page', '');
46
        if (is_null($key)) return $page_idx; // same as parent method
47
48
        // Special handling for titles
49
        if ($key == 'title') {
50
            $title_idx = $this->getIndex('title', '');
51
            array_splice($page_idx, count($title_idx));
52
            foreach ($title_idx as $i => $title) {
53
                if ($title === '') unset($page_idx[$i]);
54
            }
55
            return array_values($page_idx);
56
        }
57
58
        $metaname = $this->cleanName($key);
59
        $pages = array();
60
        $lines = $this->getIndex($metaname.'_i', '');
61
        foreach ($lines as $line) {
62
            $pages = array_merge($pages, $this->parseTuples($page_idx, $line));
63
        }
64
        return array_keys($pages);
65
    }
66
67
    /**
68
     * Add/update keys to/of the metadata index
69
     *
70
     * Adding new keys does not remove other keys for the page.
71
     * The $key parameter can be an array to add multiple keys. $value will not be used if $key is an array.
72
     * An empty value will remove the page from the metadata index.
73
     *
74
     * @param string $page a page name
75
     * @param mixed $key a key string or array of key=>value pairs
76
     * @param mixed $value the value or list of values
77
     * @param bool $requireLock should be false only if the caller is resposible for index lock
78
     * @return bool  if the function completed successfully
79
     *
80
     * @throws IndexAccessException
81
     * @throws IndexLockException
82
     * @throws IndexWriteException
83
     * @author Michael Hamann <[email protected]>
84
     * @author Tom N Harris <[email protected]>
85
     */
86
    public function addMetaKeys($page, $key, $value = null, $requireLock = true)
87
    {
88
        if (!is_array($key)) {
89
            $key = isset($value) ? array($key => $value) : array($key => '');
90
        }
91
        unset($key['']);
92
93
        // load known documents
94
        $pid = $this->getPID($page);
95
96
        if ($requireLock) $this->lock();
97
98
        // Special handling for titles so the index file is simpler
99
        if (array_key_exists('title', $key)) {
100
            $value = (string)(is_array($key['title']) ? $value[0] : $key['title']);
101
            $this->saveIndexKey('title', '', $pid, $value);
102
            unset($key['title']);
103
        }
104
105
        foreach ($key as $name => $values) {
106
            $metaname = $this->cleanName($name);
107
            if (empty($metaname)) continue;
108
            $this->addIndexKey('metadata', '', $metaname);
109
            $metaidx = $this->getIndex($metaname.'_i', '');
110
            $metawords = $this->getIndex($metaname.'_w', '');
111
            $addwords = false;
112
113
            $val_idx = $this->getIndexKey($metaname.'_p', '', $pid);
114
            if ($val_idx !== '') {
115
                $val_idx = explode(':', $val_idx);
116
                // -1 means remove, 0 keep, 1 add
117
                $val_idx = array_combine($val_idx, array_fill(0, count($val_idx), -1));
118
            } else {
119
                $val_idx = array();
120
            }
121
122
            if (!is_array($values)) $values = array($values);
123
124
            foreach ($values as $val) {
125
                $val = (string)$val;  // NULL is always converted to an empty string
126
                if ($val !== '') {
127
                    $id = array_search($val, $metawords, true);
128
                    if ($id === false) {
129
                        // not found $val, so we'll add it to the end of metawords
130
                        // and create a placeholder in metaidx
131
                        $id = count($metawords);
132
                        $metawords[$id] = $val;
133
                        $metaidx[$id] = '';
134
                        $addwords = true;
135
                    }
136
                    // test if value is already in the index
137
                    if (isset($val_idx[$id]) && $val_idx[$id] <= 0) {
138
                        $val_idx[$id] = 0;
139
                    } else { // else add it
140
                        $val_idx[$id] = 1;
141
                    }
142
                }
143
            }
144
145
            if ($addwords) {
146
                $this->saveIndex($metaname.'_w', '', $metawords);
147
            }
148
            $vals_changed = false;
149
            foreach ($val_idx as $id => $action) {
150
                if ($action == -1) {
151
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 0);
152
                    $vals_changed = true;
153
                    unset($val_idx[$id]);
154
                } elseif ($action == 1) {
155
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 1);
156
                    $vals_changed = true;
157
                }
158
            }
159
160
            if ($vals_changed) {
161
                $this->saveIndex($metaname.'_i', '', $metaidx);
162
                $val_idx = implode(':', array_keys($val_idx));
163
                $this->saveIndexKey($metaname.'_p', '', $pid, $val_idx);
164
            }
165
166
            unset($metaidx);
167
            unset($metawords);
168
        }
169
170
        if ($requireLock) $this->unlock();
171
        return true;
172
    }
173
174
    /**
175
     * Delete keys of the page from metadata index
176
     *
177
     * @param string $page a page name
178
     * @param mixed $keys a key string or array of keys
179
     * @param bool $requireLock should be false only if the caller is resposible for index lock
180
     * @return bool  If renaming the value has been successful, false on error
181
     *
182
     * @throws IndexLockException
183
     * @throws IndexWriteException
184
     * @author Satoshi Sahara <[email protected]>
185
     * @author Tom N Harris <[email protected]>
186
     */
187
    public function deleteMetaKeys($page, $keys = [], $requireLock = true)
188
    {
189
        // load known documents
190
        $pid = $this->getPID($page);
191
192
        if ($requireLock) $this->lock();
193
194
        $knownKeys = $this->getIndex('metadata', '');
195
        $knownKeys[] = 'title';
196
197
        // remove all metadata keys of the page when $keys is empty
198
        $keys = (empty($keys)) ? $knownKeys : (array)$keys;
199
200
        foreach ($keys as $metaname) {
201
            if ($metaname == 'title') {
202
                // Special handling for titles so the index file is simpler
203
                $this->saveIndexKey('title', '', $pid, '');
204
            } elseif (in_array($metaname, $knownKeys)) {
205
                $meta_idx = $this->getIndex($metaname.'_i', '');
206
                $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid));
207
                foreach ($val_idx as $id) {
208
                    if ($id === '') continue;
209
                    $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0);
210
                }
211
                $this->saveIndex($metaname.'_i', '', $meta_idx);
212
                $this->saveIndexKey($metaname.'_p', '', $pid, '');
213
            }
214
        }
215
216
        if ($requireLock) $this->unlock();
217
        return true;
218
    }
219
220
    /**
221
     * Find pages containing a metadata key
222
     *
223
     * The metadata values are compared as case-sensitive strings. Pass a
224
     * callback function that returns true or false to use a different
225
     * comparison function. The function will be called with the $value being
226
     * searched for as the first argument, and the word in the index as the
227
     * second argument. The function preg_match can be used directly if the
228
     * values are regexes.
229
     *
230
     * @param string    $key    name of the metadata key to look for
231
     * @param string    $value  search term to look for, must be a string or array of strings
232
     * @param callback  $func   comparison function
233
     * @return array            lists with page names, keys are query values if $value is array
234
     *
235
     * @author Tom N Harris <[email protected]>
236
     * @author Michael Hamann <[email protected]>
237
     */
238
    public function lookupKey($key, &$value, $func = null)
239
    {
240
        if (!is_array($value)) {
241
            $value_array = array($value);
242
        } else {
243
            $value_array =& $value;
244
        }
245
246
        // the matching ids for the provided value(s)
247
        $value_ids = array();
248
249
        $metaname = $this->cleanName($key);
250
251
        // get all words in order to search the matching ids
252
        if ($key == 'title') {
253
            $words = $this->getIndex('title', '');
254
        } else {
255
            $words = $this->getIndex($metaname.'_w', '');
256
        }
257
258
        if (!is_null($func)) {
259
            foreach ($value_array as $val) {
260
                foreach ($words as $i => $word) {
261
                    if (call_user_func_array($func, array($val, $word))) {
262
                        $value_ids[$i][] = $val;
263
                    }
264
                }
265
            }
266
        } else {
267
            foreach ($value_array as $val) {
268
                $xval = $val;
269
                $caret = '^';
270
                $dollar = '$';
271
                // check for wildcards
272
                if (substr($xval, 0, 1) == '*') {
273
                    $xval = substr($xval, 1);
274
                    $caret = '';
275
                }
276
                if (substr($xval, -1, 1) == '*') {
277
                    $xval = substr($xval, 0, -1);
278
                    $dollar = '';
279
                }
280
                if (!$caret || !$dollar) {
281
                    $re = $caret.preg_quote($xval, '/').$dollar;
282
                    foreach (array_keys(preg_grep('/'.$re.'/', $words)) as $i) {
283
                        $value_ids[$i][] = $val;
284
                    }
285
                } else {
286
                    if (($i = array_search($val, $words, true)) !== false) {
287
                        $value_ids[$i][] = $val;
288
                    }
289
                }
290
            }
291
        }
292
293
        unset($words); // free the used memory
294
295
        // initialize the result so it won't be null
296
        $result = array();
297
        foreach ($value_array as $val) {
298
            $result[$val] = array();
299
        }
300
301
        $page_idx = $this->getIndex('page', '');
302
303
        // Special handling for titles
304
        if ($key == 'title') {
305
            foreach ($value_ids as $pid => $val_list) {
306
                $page = $page_idx[$pid];
307
                foreach ($val_list as $val) {
308
                    $result[$val][] = $page;
309
                }
310
            }
311
        } else {
312
            // load all lines and pages so the used lines can be taken
313
            // and matched with the pages
314
            $lines = $this->getIndex($metaname.'_i', '');
315
316
            foreach ($value_ids as $value_id => $val_list) {
317
                // parse the tuples of the form page_id*1:page2_id*1 and so on,
318
                // return value is an array with page_id => 1, page2_id => 1 etc.
319
                // so take the keys only
320
                $pages = array_keys($this->parseTuples($page_idx, $lines[$value_id]));
321
                foreach ($val_list as $val) {
322
                    $result[$val] = array_merge($result[$val], $pages);
323
                }
324
            }
325
        }
326
        if (!is_array($value)) $result = $result[$value];
327
        return $result;
328
    }
329
330
    /**
331
     * Renames a meta value in the index
332
     * This doesn't change the meta value in the pages, it assumes that
333
     * all pages will be updated.
334
     *
335
     * @param string $key The metadata key of which a value shall be changed
336
     * @param string $oldvalue The old value that shall be renamed
337
     * @param string $newvalue The new value to which the old value shall be renamed,
338
     *                          if exists values will be merged
339
     * @return bool  If renaming the value has been successful, false on error
340
     *
341
     * @throws IndexLockException
342
     * @throws IndexWriteException
343
     */
344
    public function renameMetaValue($key, $oldvalue, $newvalue)
345
    {
346
        $this->lock();
347
348
        // change the relation references index
349
        $metavalues = $this->getIndex($key, '_w');
350
        $oldid = array_search($oldvalue, $metavalues, true);
351
        if ($oldid !== false) {
352
            $newid = array_search($newvalue, $metavalues, true);
353
            if ($newid !== false) {
354
                // free memory
355
                unset($metavalues);
356
357
                // okay, now we have two entries for the same value. we need to merge them.
358
                $indexline = $this->getIndexKey($key.'_i', '', $oldid);
359
                if ($indexline != '') {
360
                    $newindexline = $this->getIndexKey($key.'_i', '', $newid);
361
                    $pagekeys     = $this->getIndex($key.'_p', '');
362
                    $parts = explode(':', $indexline);
363
                    foreach ($parts as $part) {
364
                        list($id, $count) = explode('*', $part);
365
                        $newindexline = $this->updateTuple($newindexline, $id, $count);
366
367
                        $keyline = explode(':', $pagekeys[$id]);
368
                        // remove old meta value
369
                        $keyline = array_diff($keyline, array($oldid));
370
                        // add new meta value when not already present
371
                        if (!in_array($newid, $keyline)) {
372
                            array_push($keyline, $newid);
373
                        }
374
                        $pagekeys[$id] = implode(':', $keyline);
375
                    }
376
                    $this->saveIndex($key.'_p', '', $pagekeys);
377
                    unset($pagekeys);
378
                    $this->saveIndexKey($key.'_i', '', $oldid, '');
379
                    $this->saveIndexKey($key.'_i', '', $newid, $newindexline);
380
                }
381
            } else {
382
                $metavalues[$oldid] = $newvalue;
383
                $this->saveIndex($key.'_w', '', $metavalues);
384
            }
385
        }
386
387
        $this->unlock();
388
        return true;
389
    }
390
391
    /**
392
     * Return a list of words or frequency sorted by number of times used
393
     *
394
     * @param int       $min    bottom frequency threshold
395
     * @param int       $max    upper frequency limit. No limit if $max<$min
396
     * @param int       $minlen minimum length of words to count
397
     * @param string    $key    metadata key to list. Uses the fulltext index if not given
398
     * @return array            list of words as the keys and frequency as values
399
     *
400
     * @author Tom N Harris <[email protected]>
401
     */
402
    public function histogram($min=1, $max=0, $minlen=3, $key=null)
403
    {
404
        if ($min < 1)    $min = 1;
405
        if ($max < $min) $max = 0;
406
407
        $result = array();
408
409
        if ($key == 'title') {
410
            $index = $this->getIndex('title', '');
411
            $index = array_count_values($index);
412
            foreach ($index as $val => $cnt) {
413
                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen) {
414
                    $result[$val] = $cnt;
415
                }
416
            }
417
        } elseif (!is_null($key)) {
418
            $metaname = $this->cleanName($key);
419
            $index = $this->getIndex($metaname.'_i', '');
420
            $val_idx = array();
421
            foreach ($index as $wid => $line) {
422
                $freq = $this->countTuples($line);
423
                if ($freq >= $min && (!$max || $freq <= $max)) {
424
                    $val_idx[$wid] = $freq;
425
                }
426
            }
427
            if (!empty($val_idx)) {
428
                $words = $this->getIndex($metaname.'_w', '');
429
                foreach ($val_idx as $wid => $freq) {
430
                    if (strlen($words[$wid]) >= $minlen) {
431
                        $result[$words[$wid]] = $freq;
432
                    }
433
                }
434
            }
435
        } else {
436
            $FulltextIndex = FulltextIndex::getInstance();
437
            $lengths = $FulltextIndex->listIndexLengths();
438
            foreach ($lengths as $length) {
439
                if ($length < $minlen) continue;
440
                $index = $this->getIndex('i', $length);
441
                $words = null;
442
                foreach ($index as $wid => $line) {
443
                    $freq = $this->countTuples($line);
444
                    if ($freq >= $min && (!$max || $freq <= $max)) {
445
                        if ($words === null) {
446
                            $words = $this->getIndex('w', $length);
447
                        }
448
                        $result[$words[$wid]] = $freq;
449
                    }
450
                }
451
            }
452
        }
453
454
        arsort($result);
455
        return $result;
456
    }
457
458
    /**
459
     * Clear the Metadata Index
460
     *
461
     * @param bool $requireLock should be false only if the caller is resposible for index lock
462
     * @return bool  If the index has been cleared successfully
463
     * @throws Exception\IndexLockException
464
     */
465
    public function clear($requireLock = true)
466
    {
467
        global $conf;
468
469
        if ($requireLock) $this->lock();
470
471
        $knownKeys = $this->getIndex('metadata', '');
472
        foreach ($knownKeys as $metaname) {
473
            @unlink($conf['indexdir'].'/'.$metaname.'_w.idx');
474
            @unlink($conf['indexdir'].'/'.$metaname.'_i.idx');
475
            @unlink($conf['indexdir'].'/'.$metaname.'_p.idx');
476
        }
477
        @unlink($conf['indexdir'].'/title.idx');
478
        @unlink($conf['indexdir'].'/metadata.idx');
479
480
        if ($requireLock) $this->unlock();
481
        return true;
482
    }
483
484
    /**
485
     * Returns the backlinks for a given page
486
     *
487
     * Uses the metadata index.
488
     *
489
     * @param string $id           The id for which links shall be returned
490
     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
491
     * @return array The pages that contain links to the given page
492
     *
493
     * @author     Andreas Gohr <[email protected]>
494
     */
495
    public function backlinks($id, $ignore_perms = false)
496
    {
497
        $result = $this->lookupKey('relation_references', $id);
498
499
        if (!count($result)) return $result;
500
501
        // check ACL permissions
502
        foreach (array_keys($result) as $idx) {
503
            if (($ignore_perms !== true
504
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
505
                ) || !page_exists($result[$idx], '', false)
506
            ) {
507
                unset($result[$idx]);
508
            }
509
        }
510
511
        sort($result);
512
        return $result;
513
    }
514
515
    /**
516
     * Returns the pages that use a given media file
517
     *
518
     * Uses the relation media metadata property and the metadata index.
519
     *
520
     * Note that before 2013-07-31 the second parameter was the maximum number
521
     * of results and permissions were ignored. That's why the parameter is now
522
     * checked to be explicitely set to true (with type bool) in order to be
523
     * compatible with older uses of the function.
524
     *
525
     * @param string $id           The media id to look for
526
     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
527
     * @return array A list of pages that use the given media file
528
     *
529
     * @author     Andreas Gohr <[email protected]>
530
     */
531
    public function mediause($id, $ignore_perms = false)
532
    {
533
        $result = $this->lookupKey('relation_media', $id);
534
535
        if (!count($result)) return $result;
536
537
        // check ACL permissions
538
        foreach (array_keys($result) as $idx) {
539
            if (($ignore_perms !== true
540
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
541
                ) || !page_exists($result[$idx], '', false)
542
            ) {
543
                unset($result[$idx]);
544
            }
545
        }
546
547
        sort($result);
548
        return $result;
549
    }
550
}
551