Failed Conditions
Pull Request — master (#2943)
by
unknown
03:07
created

MetadataIndex::backlinks()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
nc 4
nop 2
dl 0
loc 19
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexAccessException;
6
use dokuwiki\Search\Exception\IndexLockException;
7
use dokuwiki\Search\Exception\IndexWriteException;
8
9
/**
10
 * Class DokuWiki Metadata Index (Singleton)
11
 *
12
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13
 * @author     Andreas Gohr <[email protected]>
14
 * @author Tom N Harris <[email protected]>
15
 */
16
class MetadataIndex extends AbstractIndex
17
{
18
    // numeric page id to be added to or deleted from the Metadata index
19
    protected $pageID;
20
21
    /**
22
     * MetadataIndex constructor
23
     *
24
     * @param string|int $page a page name or numeric page id
25
     */
26
    public function __construct($page = null)
27
    {
28
        if (isset($page)) {
29
            $this->pageID = is_int($page) ? $page : $this->getPID($page);
30
        }
31
    }
32
33
    /**
34
     * Return a list of pages containing the metadata key
35
     * Note: override parent class methods
36
     *
37
     * @param string    $key    list only pages containing the metadata key
38
     * @return array            list of page names
39
     *
40
     * @author Tom N Harris <[email protected]>
41
     */
42
    public function getPages($key = null)
43
    {
44
        $page_idx = $this->getIndex('page', '');
45
        if (is_null($key)) return $page_idx; // same as parent method
46
47
        // Special handling for titles
48
        if ($key == 'title') {
49
            $title_idx = $this->getIndex('title', '');
50
            array_splice($page_idx, count($title_idx));
51
            foreach ($title_idx as $i => $title) {
52
                if ($title === '') unset($page_idx[$i]);
53
            }
54
            return array_values($page_idx);
55
        }
56
57
        $metaname = $this->cleanName($key);
58
        $pages = array();
59
        $lines = $this->getIndex($metaname.'_i', '');
60
        foreach ($lines as $line) {
61
            $pages = array_merge($pages, $this->parseTuples($page_idx, $line));
62
        }
63
        return array_keys($pages);
64
    }
65
66
    /**
67
     * Add/update keys to/of the metadata index
68
     *
69
     * Adding new keys does not remove other keys for the page.
70
     * The $key parameter can be an array to add multiple keys. $value will not be used if $key is an array.
71
     * An empty value will remove the page from the metadata index.
72
     *
73
     * @param mixed $key a key string or array of key=>value pairs
74
     * @param mixed $value the value or list of values
75
     * @param bool $requireLock should be false only if the caller is resposible for index lock
76
     * @return bool  if the function completed successfully
77
     *
78
     * @throws IndexAccessException
79
     * @throws IndexLockException
80
     * @throws IndexWriteException
81
     * @author Michael Hamann <[email protected]>
82
     * @author Tom N Harris <[email protected]>
83
     */
84
    public function addMetaKeys($key, $value = null, $requireLock = true)
85
    {
86
        // load known documents
87
        if (!isset($this->pageID)) {
88
            throw new IndexAccessException('Indexer: page unknown to addMetaKeys');
89
        } else {
90
            $pid = $this->pageID;
91
        }
92
93
        if (!is_array($key)) {
94
            $key = isset($value) ? array($key => $value) : array($key => '');
95
        }
96
        unset($key['']);
97
98
        if ($requireLock) $this->lock();
99
100
        // Special handling for titles so the index file is simpler
101
        if (array_key_exists('title', $key)) {
102
            $value = (string)(is_array($key['title']) ? $value[0] : $key['title']);
103
            $this->saveIndexKey('title', '', $pid, $value);
104
            unset($key['title']);
105
        }
106
107
        foreach ($key as $name => $values) {
108
            $metaname = $this->cleanName($name);
109
            if (empty($metaname)) continue;
110
            $this->addIndexKey('metadata', '', $metaname);
111
            $metaidx = $this->getIndex($metaname.'_i', '');
112
            $metawords = $this->getIndex($metaname.'_w', '');
113
            $addwords = false;
114
115
            $val_idx = $this->getIndexKey($metaname.'_p', '', $pid);
116
            if ($val_idx !== '') {
117
                $val_idx = explode(':', $val_idx);
118
                // -1 means remove, 0 keep, 1 add
119
                $val_idx = array_combine($val_idx, array_fill(0, count($val_idx), -1));
120
            } else {
121
                $val_idx = array();
122
            }
123
124
            if (!is_array($values)) $values = array($values);
125
126
            foreach ($values as $val) {
127
                $val = (string)$val;  // NULL is always converted to an empty string
128
                if ($val !== '') {
129
                    $id = array_search($val, $metawords, true);
130
                    if ($id === false) {
131
                        // not found $val, so we'll add it to the end of metawords
132
                        // and create a placeholder in metaidx
133
                        $id = count($metawords);
134
                        $metawords[$id] = $val;
135
                        $metaidx[$id] = '';
136
                        $addwords = true;
137
                    }
138
                    // test if value is already in the index
139
                    if (isset($val_idx[$id]) && $val_idx[$id] <= 0) {
140
                        $val_idx[$id] = 0;
141
                    } else { // else add it
142
                        $val_idx[$id] = 1;
143
                    }
144
                }
145
            }
146
147
            if ($addwords) {
148
                $this->saveIndex($metaname.'_w', '', $metawords);
149
            }
150
            $vals_changed = false;
151
            foreach ($val_idx as $id => $action) {
152
                if ($action == -1) {
153
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 0);
154
                    $vals_changed = true;
155
                    unset($val_idx[$id]);
156
                } elseif ($action == 1) {
157
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 1);
158
                    $vals_changed = true;
159
                }
160
            }
161
162
            if ($vals_changed) {
163
                $this->saveIndex($metaname.'_i', '', $metaidx);
164
                $val_idx = implode(':', array_keys($val_idx));
165
                $this->saveIndexKey($metaname.'_p', '', $pid, $val_idx);
166
            }
167
168
            unset($metaidx);
169
            unset($metawords);
170
        }
171
172
        if ($requireLock) $this->unlock();
173
        return true;
174
    }
175
176
    /**
177
     * Delete keys of the page from metadata index
178
     *
179
     * @param mixed $keys a key string or array of keys
180
     * @param bool $requireLock should be false only if the caller is resposible for index lock
181
     * @return bool  If renaming the value has been successful, false on error
182
     *
183
     * @throws IndexAccessException
184
     * @throws IndexLockException
185
     * @throws IndexWriteException
186
     * @author Satoshi Sahara <[email protected]>
187
     * @author Tom N Harris <[email protected]>
188
     */
189
    public function deleteMetaKeys($keys = [], $requireLock = true)
190
    {
191
        // load known documents
192
        if (!isset($this->pageID)) {
193
            throw new IndexAccessException('Indexer: page unknown to deleteMetaKeys');
194
        } else {
195
            $pid = $this->pageID;
196
        }
197
198
        if ($requireLock) $this->lock();
199
200
        $knownKeys = $this->getIndex('metadata', '');
201
        $knownKeys[] = 'title';
202
203
        // remove all metadata keys of the page when $keys is empty
204
        $keys = (empty($keys)) ? $knownKeys : (array)$keys;
205
206
        foreach ($keys as $metaname) {
207
            if ($metaname == 'title') {
208
                // Special handling for titles so the index file is simpler
209
                $this->saveIndexKey('title', '', $pid, '');
210
            } elseif (in_array($metaname, $knownKeys)) {
211
                $meta_idx = $this->getIndex($metaname.'_i', '');
212
                $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid));
213
                foreach ($val_idx as $id) {
214
                    if ($id === '') continue;
215
                    $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0);
216
                }
217
                $this->saveIndex($metaname.'_i', '', $meta_idx);
218
                $this->saveIndexKey($metaname.'_p', '', $pid, '');
219
            }
220
        }
221
222
        if ($requireLock) $this->unlock();
223
        return true;
224
    }
225
226
    /**
227
     * Find pages containing a metadata key
228
     *
229
     * The metadata values are compared as case-sensitive strings. Pass a
230
     * callback function that returns true or false to use a different
231
     * comparison function. The function will be called with the $value being
232
     * searched for as the first argument, and the word in the index as the
233
     * second argument. The function preg_match can be used directly if the
234
     * values are regexes.
235
     *
236
     * @param string    $key    name of the metadata key to look for
237
     * @param string    $value  search term to look for, must be a string or array of strings
238
     * @param callback  $func   comparison function
239
     * @return array            lists with page names, keys are query values if $value is array
240
     *
241
     * @author Tom N Harris <[email protected]>
242
     * @author Michael Hamann <[email protected]>
243
     */
244
    public function lookupKey($key, &$value, $func = null)
245
    {
246
        if (!is_array($value)) {
247
            $value_array = array($value);
248
        } else {
249
            $value_array =& $value;
250
        }
251
252
        // the matching ids for the provided value(s)
253
        $value_ids = array();
254
255
        $metaname = $this->cleanName($key);
256
257
        // get all words in order to search the matching ids
258
        if ($key == 'title') {
259
            $words = $this->getIndex('title', '');
260
        } else {
261
            $words = $this->getIndex($metaname.'_w', '');
262
        }
263
264
        if (!is_null($func)) {
265
            foreach ($value_array as $val) {
266
                foreach ($words as $i => $word) {
267
                    if (call_user_func_array($func, array($val, $word))) {
268
                        $value_ids[$i][] = $val;
269
                    }
270
                }
271
            }
272
        } else {
273
            foreach ($value_array as $val) {
274
                $xval = $val;
275
                $caret = '^';
276
                $dollar = '$';
277
                // check for wildcards
278
                if (substr($xval, 0, 1) == '*') {
279
                    $xval = substr($xval, 1);
280
                    $caret = '';
281
                }
282
                if (substr($xval, -1, 1) == '*') {
283
                    $xval = substr($xval, 0, -1);
284
                    $dollar = '';
285
                }
286
                if (!$caret || !$dollar) {
287
                    $re = $caret.preg_quote($xval, '/').$dollar;
288
                    foreach (array_keys(preg_grep('/'.$re.'/', $words)) as $i) {
289
                        $value_ids[$i][] = $val;
290
                    }
291
                } else {
292
                    if (($i = array_search($val, $words, true)) !== false) {
293
                        $value_ids[$i][] = $val;
294
                    }
295
                }
296
            }
297
        }
298
299
        unset($words); // free the used memory
300
301
        // initialize the result so it won't be null
302
        $result = array();
303
        foreach ($value_array as $val) {
304
            $result[$val] = array();
305
        }
306
307
        $page_idx = $this->getIndex('page', '');
308
309
        // Special handling for titles
310
        if ($key == 'title') {
311
            foreach ($value_ids as $pid => $val_list) {
312
                $page = $page_idx[$pid];
313
                foreach ($val_list as $val) {
314
                    $result[$val][] = $page;
315
                }
316
            }
317
        } else {
318
            // load all lines and pages so the used lines can be taken
319
            // and matched with the pages
320
            $lines = $this->getIndex($metaname.'_i', '');
321
322
            foreach ($value_ids as $value_id => $val_list) {
323
                // parse the tuples of the form page_id*1:page2_id*1 and so on,
324
                // return value is an array with page_id => 1, page2_id => 1 etc.
325
                // so take the keys only
326
                $pages = array_keys($this->parseTuples($page_idx, $lines[$value_id]));
327
                foreach ($val_list as $val) {
328
                    $result[$val] = array_merge($result[$val], $pages);
329
                }
330
            }
331
        }
332
        if (!is_array($value)) $result = $result[$value];
333
        return $result;
334
    }
335
336
    /**
337
     * Renames a meta value in the index
338
     * This doesn't change the meta value in the pages, it assumes that
339
     * all pages will be updated.
340
     *
341
     * @param string $key The metadata key of which a value shall be changed
342
     * @param string $oldvalue The old value that shall be renamed
343
     * @param string $newvalue The new value to which the old value shall be renamed,
344
     *                          if exists values will be merged
345
     * @return bool  If renaming the value has been successful, false on error
346
     *
347
     * @throws IndexLockException
348
     * @throws IndexWriteException
349
     */
350
    public function renameMetaValue($key, $oldvalue, $newvalue)
351
    {
352
        $this->lock();
353
354
        // change the relation references index
355
        $metavalues = $this->getIndex($key, '_w');
356
        $oldid = array_search($oldvalue, $metavalues, true);
357
        if ($oldid !== false) {
358
            $newid = array_search($newvalue, $metavalues, true);
359
            if ($newid !== false) {
360
                // free memory
361
                unset($metavalues);
362
363
                // okay, now we have two entries for the same value. we need to merge them.
364
                $indexline = $this->getIndexKey($key.'_i', '', $oldid);
365
                if ($indexline != '') {
366
                    $newindexline = $this->getIndexKey($key.'_i', '', $newid);
367
                    $pagekeys     = $this->getIndex($key.'_p', '');
368
                    $parts = explode(':', $indexline);
369
                    foreach ($parts as $part) {
370
                        list($id, $count) = explode('*', $part);
371
                        $newindexline = $this->updateTuple($newindexline, $id, $count);
372
373
                        $keyline = explode(':', $pagekeys[$id]);
374
                        // remove old meta value
375
                        $keyline = array_diff($keyline, array($oldid));
376
                        // add new meta value when not already present
377
                        if (!in_array($newid, $keyline)) {
378
                            array_push($keyline, $newid);
379
                        }
380
                        $pagekeys[$id] = implode(':', $keyline);
381
                    }
382
                    $this->saveIndex($key.'_p', '', $pagekeys);
383
                    unset($pagekeys);
384
                    $this->saveIndexKey($key.'_i', '', $oldid, '');
385
                    $this->saveIndexKey($key.'_i', '', $newid, $newindexline);
386
                }
387
            } else {
388
                $metavalues[$oldid] = $newvalue;
389
                $this->saveIndex($key.'_w', '', $metavalues);
390
            }
391
        }
392
393
        $this->unlock();
394
        return true;
395
    }
396
397
    /**
398
     * Return a list of words or frequency sorted by number of times used
399
     *
400
     * @param int       $min    bottom frequency threshold
401
     * @param int       $max    upper frequency limit. No limit if $max<$min
402
     * @param int       $minlen minimum length of words to count
403
     * @param string    $key    metadata key to list. Uses the fulltext index if not given
404
     * @return array            list of words as the keys and frequency as values
405
     *
406
     * @author Tom N Harris <[email protected]>
407
     */
408
    public function histogram($min=1, $max=0, $minlen=3, $key=null)
409
    {
410
        if ($min < 1)    $min = 1;
411
        if ($max < $min) $max = 0;
412
413
        $result = array();
414
415
        if ($key == 'title') {
416
            $index = $this->getIndex('title', '');
417
            $index = array_count_values($index);
418
            foreach ($index as $val => $cnt) {
419
                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen) {
420
                    $result[$val] = $cnt;
421
                }
422
            }
423
        } elseif (!is_null($key)) {
424
            $metaname = $this->cleanName($key);
425
            $index = $this->getIndex($metaname.'_i', '');
426
            $val_idx = array();
427
            foreach ($index as $wid => $line) {
428
                $freq = $this->countTuples($line);
429
                if ($freq >= $min && (!$max || $freq <= $max)) {
430
                    $val_idx[$wid] = $freq;
431
                }
432
            }
433
            if (!empty($val_idx)) {
434
                $words = $this->getIndex($metaname.'_w', '');
435
                foreach ($val_idx as $wid => $freq) {
436
                    if (strlen($words[$wid]) >= $minlen) {
437
                        $result[$words[$wid]] = $freq;
438
                    }
439
                }
440
            }
441
        } else {
442
            $lengths = (new FulltextIndex)->listIndexLengths();
443
            foreach ($lengths as $length) {
444
                if ($length < $minlen) continue;
445
                $index = $this->getIndex('i', $length);
446
                $words = null;
447
                foreach ($index as $wid => $line) {
448
                    $freq = $this->countTuples($line);
449
                    if ($freq >= $min && (!$max || $freq <= $max)) {
450
                        if ($words === null) {
451
                            $words = $this->getIndex('w', $length);
452
                        }
453
                        $result[$words[$wid]] = $freq;
454
                    }
455
                }
456
            }
457
        }
458
459
        arsort($result);
460
        return $result;
461
    }
462
463
    /**
464
     * Clear the Metadata Index
465
     *
466
     * @param bool $requireLock should be false only if the caller is resposible for index lock
467
     * @return bool  If the index has been cleared successfully
468
     * @throws Exception\IndexLockException
469
     */
470
    public function clear($requireLock = true)
471
    {
472
        global $conf;
473
474
        if ($requireLock) $this->lock();
475
476
        $knownKeys = $this->getIndex('metadata', '');
477
        foreach ($knownKeys as $metaname) {
478
            @unlink($conf['indexdir'].'/'.$metaname.'_w.idx');
479
            @unlink($conf['indexdir'].'/'.$metaname.'_i.idx');
480
            @unlink($conf['indexdir'].'/'.$metaname.'_p.idx');
481
        }
482
        @unlink($conf['indexdir'].'/title.idx');
483
        @unlink($conf['indexdir'].'/metadata.idx');
484
485
        if ($requireLock) $this->unlock();
486
        return true;
487
    }
488
489
    /**
490
     * Returns the backlinks for a given page
491
     *
492
     * Uses the metadata index.
493
     *
494
     * @param string $id           The id for which links shall be returned
495
     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
496
     * @return array The pages that contain links to the given page
497
     *
498
     * @author     Andreas Gohr <[email protected]>
499
     */
500
    public function backlinks($id, $ignore_perms = false)
501
    {
502
        $result = $this->lookupKey('relation_references', $id);
503
504
        if (!count($result)) return $result;
505
506
        // check ACL permissions
507
        foreach (array_keys($result) as $idx) {
508
            if (($ignore_perms !== true
509
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
510
                ) || !page_exists($result[$idx], '', false)
511
            ) {
512
                unset($result[$idx]);
513
            }
514
        }
515
516
        sort($result);
517
        return $result;
518
    }
519
520
    /**
521
     * Returns the pages that use a given media file
522
     *
523
     * Uses the relation media metadata property and the metadata index.
524
     *
525
     * Note that before 2013-07-31 the second parameter was the maximum number
526
     * of results and permissions were ignored. That's why the parameter is now
527
     * checked to be explicitely set to true (with type bool) in order to be
528
     * compatible with older uses of the function.
529
     *
530
     * @param string $id           The media id to look for
531
     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
532
     * @return array A list of pages that use the given media file
533
     *
534
     * @author     Andreas Gohr <[email protected]>
535
     */
536
    public function mediause($id, $ignore_perms = false)
537
    {
538
        $result = $this->lookupKey('relation_media', $id);
539
540
        if (!count($result)) return $result;
541
542
        // check ACL permissions
543
        foreach (array_keys($result) as $idx) {
544
            if (($ignore_perms !== true
545
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
546
                ) || !page_exists($result[$idx], '', false)
547
            ) {
548
                unset($result[$idx]);
549
            }
550
        }
551
552
        sort($result);
553
        return $result;
554
    }
555
}
556