Failed Conditions
Pull Request — master (#2943)
by
unknown
03:21
created

MetadataIndex::mediause()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
nc 4
nop 2
dl 0
loc 19
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
namespace dokuwiki\Search;
4
5
use dokuwiki\Search\Exception\IndexAccessException;
6
use dokuwiki\Search\Exception\IndexLockException;
7
use dokuwiki\Search\Exception\IndexWriteException;
8
9
/**
10
 * Class DokuWiki Metadata Index (Singleton)
11
 *
12
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13
 * @author     Andreas Gohr <[email protected]>
14
 * @author Tom N Harris <[email protected]>
15
 */
16
class MetadataIndex extends AbstractIndex
17
{
18
    /** @var MetadataIndex $instance */
19
    protected static $instance = null;
20
21
    /**
22
     * Get new or existing singleton instance of the MetadataIndex
23
     *
24
     * @return MetadataIndex
25
     */
26
    public static function getInstance()
27
    {
28
        if (is_null(static::$instance)) {
29
            static::$instance = new static();
30
        }
31
        return static::$instance;
32
    }
33
34
    /**
35
     * Return a list of pages containing the metadata key
36
     * Note: override parent class methods
37
     *
38
     * @param string    $key    list only pages containing the metadata key
39
     * @return array            list of page names
40
     *
41
     * @author Tom N Harris <[email protected]>
42
     */
43
    public function getPages($key = null)
44
    {
45
        $page_idx = $this->getIndex('page', '');
46
        if (is_null($key)) return $page_idx; // same as parent method
47
48
        // Special handling for titles
49
        if ($key == 'title') {
50
            $title_idx = $this->getIndex('title', '');
51
            array_splice($page_idx, count($title_idx));
52
            foreach ($title_idx as $i => $title) {
53
                if ($title === '') unset($page_idx[$i]);
54
            }
55
            return array_values($page_idx);
56
        }
57
58
        $metaname = $this->cleanName($key);
59
        $pages = array();
60
        $lines = $this->getIndex($metaname.'_i', '');
61
        foreach ($lines as $line) {
62
            $pages = array_merge($pages, $this->parseTuples($page_idx, $line));
63
        }
64
        return array_keys($pages);
65
    }
66
67
    /**
68
     * Add/update keys to/of the metadata index
69
     *
70
     * Adding new keys does not remove other keys for the page.
71
     * An empty value will erase the key.
72
     * The $key parameter can be an array to add multiple keys. $value will
73
     * not be used if $key is an array.
74
     *
75
     * @param string $page a page name
76
     * @param mixed $key a key string or array of key=>value pairs
77
     * @param mixed $value the value or list of values
78
     * @param bool $requireLock should be false only if the caller is resposible for index lock
79
     * @return bool  if the function completed successfully
80
     *
81
     * @throws IndexAccessException
82
     * @throws IndexLockException
83
     * @throws IndexWriteException
84
     * @author Michael Hamann <[email protected]>
85
     * @author Tom N Harris <[email protected]>
86
     */
87
    public function addMetaKeys($page, $key, $value = null, $requireLock = true)
88
    {
89
        if (!is_array($key)) {
90
            $key = array($key => $value);
91
        } elseif (!is_null($value)) {
92
            // $key is array, but $value is not null
93
            throw new IndexAccessException('array passed to addMetaKeys but value is not null');
94
        }
95
96
        // load known documents
97
        $pid = $this->getPID($page);
98
99
        if ($requireLock) $this->lock();
100
101
        // Special handling for titles so the index file is simpler
102
        if (array_key_exists('title', $key)) {
103
            $value = $key['title'];
104
            if (is_array($value)) {
105
                $value = $value[0];
106
            }
107
            $this->saveIndexKey('title', '', $pid, $value);
108
            unset($key['title']);
109
        }
110
111
        foreach ($key as $name => $values) {
112
            $metaname = $this->cleanName($name);
113
            $this->addIndexKey('metadata', '', $metaname);
114
            $metaidx = $this->getIndex($metaname.'_i', '');
115
            $metawords = $this->getIndex($metaname.'_w', '');
116
            $addwords = false;
117
118
            if (!is_array($values)) $values = array($values);
119
120
            $val_idx = $this->getIndexKey($metaname.'_p', '', $pid);
121
            if ($val_idx !== '') {
122
                $val_idx = explode(':', $val_idx);
123
                // -1 means remove, 0 keep, 1 add
124
                $val_idx = array_combine($val_idx, array_fill(0, count($val_idx), -1));
125
            } else {
126
                $val_idx = array();
127
            }
128
129
            foreach ($values as $val) {
130
                $val = (string)$val;
131
                if ($val !== '') {
132
                    $id = array_search($val, $metawords, true);
133
                    if ($id === false) {
134
                        // didn't find $val, so we'll add it to the end of metawords
135
                        // and create a placeholder in metaidx
136
                        $id = count($metawords);
137
                        $metawords[$id] = $val;
138
                        $metaidx[$id] = '';
139
                        $addwords = true;
140
                    }
141
                    // test if value is already in the index
142
                    if (isset($val_idx[$id]) && $val_idx[$id] <= 0) {
143
                        $val_idx[$id] = 0;
144
                    } else { // else add it
145
                        $val_idx[$id] = 1;
146
                    }
147
                }
148
            }
149
150
            if ($addwords) {
151
                $this->saveIndex($metaname.'_w', '', $metawords);
152
            }
153
            $vals_changed = false;
154
            foreach ($val_idx as $id => $action) {
155
                if ($action == -1) {
156
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 0);
157
                    $vals_changed = true;
158
                    unset($val_idx[$id]);
159
                } elseif ($action == 1) {
160
                    $metaidx[$id] = $this->updateTuple($metaidx[$id], $pid, 1);
161
                    $vals_changed = true;
162
                }
163
            }
164
165
            if ($vals_changed) {
166
                $this->saveIndex($metaname.'_i', '', $metaidx);
167
                $val_idx = implode(':', array_keys($val_idx));
168
                $this->saveIndexKey($metaname.'_p', '', $pid, $val_idx);
169
            }
170
171
            unset($metaidx);
172
            unset($metawords);
173
        }
174
175
        if ($requireLock) $this->unlock();
176
        return true;
177
    }
178
179
    /**
180
     * Delete keys of the page from metadata index
181
     *
182
     * @param string $page a page name
183
     * @param mixed $keys a key string or array of keys
184
     * @param bool $requireLock should be false only if the caller is resposible for index lock
185
     * @return bool  If renaming the value has been successful, false on error
186
     *
187
     * @throws IndexLockException
188
     * @throws IndexWriteException
189
     * @author Satoshi Sahara <[email protected]>
190
     * @author Tom N Harris <[email protected]>
191
     */
192
    public function deleteMetaKeys($page, $keys = [], $requireLock = true)
193
    {
194
        // load known documents
195
        $pid = $this->getPID($page);
196
197
        if ($requireLock) $this->lock();
198
199
        $knownKeys = $this->getIndex('metadata', '');
200
        $knownKeys[] = 'title';
201
202
        // remove all metadata keys of the page when $keys is empty
203
        $keys = (empty($keys)) ? $knownKeys : (array)$keys;
204
205
        foreach ($keys as $metaname) {
206
            if ($metaname == 'title') {
207
                // Special handling for titles so the index file is simpler
208
                $this->saveIndexKey('title', '', $pid, '');
209
            } elseif (in_array($metaname, $knownKeys)) {
210
                $meta_idx = $this->getIndex($metaname.'_i', '');
211
                $val_idx = explode(':', $this->getIndexKey($metaname.'_p', '', $pid));
212
                foreach ($val_idx as $id) {
213
                    if ($id === '') continue;
214
                    $meta_idx[$id] = $this->updateTuple($meta_idx[$id], $pid, 0);
215
                }
216
                $this->saveIndex($metaname.'_i', '', $meta_idx);
217
                $this->saveIndexKey($metaname.'_p', '', $pid, '');
218
            }
219
        }
220
221
        if ($requireLock) $this->unlock();
222
        return true;
223
    }
224
225
    /**
226
     * Find pages containing a metadata key
227
     *
228
     * The metadata values are compared as case-sensitive strings. Pass a
229
     * callback function that returns true or false to use a different
230
     * comparison function. The function will be called with the $value being
231
     * searched for as the first argument, and the word in the index as the
232
     * second argument. The function preg_match can be used directly if the
233
     * values are regexes.
234
     *
235
     * @param string    $key    name of the metadata key to look for
236
     * @param string    $value  search term to look for, must be a string or array of strings
237
     * @param callback  $func   comparison function
238
     * @return array            lists with page names, keys are query values if $value is array
239
     *
240
     * @author Tom N Harris <[email protected]>
241
     * @author Michael Hamann <[email protected]>
242
     */
243
    public function lookupKey($key, &$value, $func = null)
244
    {
245
        if (!is_array($value)) {
246
            $value_array = array($value);
247
        } else {
248
            $value_array =& $value;
249
        }
250
251
        // the matching ids for the provided value(s)
252
        $value_ids = array();
253
254
        $metaname = $this->cleanName($key);
255
256
        // get all words in order to search the matching ids
257
        if ($key == 'title') {
258
            $words = $this->getIndex('title', '');
259
        } else {
260
            $words = $this->getIndex($metaname.'_w', '');
261
        }
262
263
        if (!is_null($func)) {
264
            foreach ($value_array as $val) {
265
                foreach ($words as $i => $word) {
266
                    if (call_user_func_array($func, array($val, $word))) {
267
                        $value_ids[$i][] = $val;
268
                    }
269
                }
270
            }
271
        } else {
272
            foreach ($value_array as $val) {
273
                $xval = $val;
274
                $caret = '^';
275
                $dollar = '$';
276
                // check for wildcards
277
                if (substr($xval, 0, 1) == '*') {
278
                    $xval = substr($xval, 1);
279
                    $caret = '';
280
                }
281
                if (substr($xval, -1, 1) == '*') {
282
                    $xval = substr($xval, 0, -1);
283
                    $dollar = '';
284
                }
285
                if (!$caret || !$dollar) {
286
                    $re = $caret.preg_quote($xval, '/').$dollar;
287
                    foreach (array_keys(preg_grep('/'.$re.'/', $words)) as $i) {
288
                        $value_ids[$i][] = $val;
289
                    }
290
                } else {
291
                    if (($i = array_search($val, $words, true)) !== false) {
292
                        $value_ids[$i][] = $val;
293
                    }
294
                }
295
            }
296
        }
297
298
        unset($words); // free the used memory
299
300
        // initialize the result so it won't be null
301
        $result = array();
302
        foreach ($value_array as $val) {
303
            $result[$val] = array();
304
        }
305
306
        $page_idx = $this->getIndex('page', '');
307
308
        // Special handling for titles
309
        if ($key == 'title') {
310
            foreach ($value_ids as $pid => $val_list) {
311
                $page = $page_idx[$pid];
312
                foreach ($val_list as $val) {
313
                    $result[$val][] = $page;
314
                }
315
            }
316
        } else {
317
            // load all lines and pages so the used lines can be taken
318
            // and matched with the pages
319
            $lines = $this->getIndex($metaname.'_i', '');
320
321
            foreach ($value_ids as $value_id => $val_list) {
322
                // parse the tuples of the form page_id*1:page2_id*1 and so on,
323
                // return value is an array with page_id => 1, page2_id => 1 etc.
324
                // so take the keys only
325
                $pages = array_keys($this->parseTuples($page_idx, $lines[$value_id]));
326
                foreach ($val_list as $val) {
327
                    $result[$val] = array_merge($result[$val], $pages);
328
                }
329
            }
330
        }
331
        if (!is_array($value)) $result = $result[$value];
332
        return $result;
333
    }
334
335
    /**
336
     * Renames a meta value in the index
337
     * This doesn't change the meta value in the pages, it assumes that
338
     * all pages will be updated.
339
     *
340
     * @param string $key The metadata key of which a value shall be changed
341
     * @param string $oldvalue The old value that shall be renamed
342
     * @param string $newvalue The new value to which the old value shall be renamed,
343
     *                          if exists values will be merged
344
     * @return bool  If renaming the value has been successful, false on error
345
     *
346
     * @throws IndexLockException
347
     * @throws IndexWriteException
348
     */
349
    public function renameMetaValue($key, $oldvalue, $newvalue)
350
    {
351
        $this->lock();
352
353
        // change the relation references index
354
        $metavalues = $this->getIndex($key, '_w');
355
        $oldid = array_search($oldvalue, $metavalues, true);
356
        if ($oldid !== false) {
357
            $newid = array_search($newvalue, $metavalues, true);
358
            if ($newid !== false) {
359
                // free memory
360
                unset($metavalues);
361
362
                // okay, now we have two entries for the same value. we need to merge them.
363
                $indexline = $this->getIndexKey($key.'_i', '', $oldid);
364
                if ($indexline != '') {
365
                    $newindexline = $this->getIndexKey($key.'_i', '', $newid);
366
                    $pagekeys     = $this->getIndex($key.'_p', '');
367
                    $parts = explode(':', $indexline);
368
                    foreach ($parts as $part) {
369
                        list($id, $count) = explode('*', $part);
370
                        $newindexline = $this->updateTuple($newindexline, $id, $count);
371
372
                        $keyline = explode(':', $pagekeys[$id]);
373
                        // remove old meta value
374
                        $keyline = array_diff($keyline, array($oldid));
375
                        // add new meta value when not already present
376
                        if (!in_array($newid, $keyline)) {
377
                            array_push($keyline, $newid);
378
                        }
379
                        $pagekeys[$id] = implode(':', $keyline);
380
                    }
381
                    $this->saveIndex($key.'_p', '', $pagekeys);
382
                    unset($pagekeys);
383
                    $this->saveIndexKey($key.'_i', '', $oldid, '');
384
                    $this->saveIndexKey($key.'_i', '', $newid, $newindexline);
385
                }
386
            } else {
387
                $metavalues[$oldid] = $newvalue;
388
                $this->saveIndex($key.'_w', '', $metavalues);
389
            }
390
        }
391
392
        $this->unlock();
393
        return true;
394
    }
395
396
    /**
397
     * Return a list of words or frequency sorted by number of times used
398
     *
399
     * @param int       $min    bottom frequency threshold
400
     * @param int       $max    upper frequency limit. No limit if $max<$min
401
     * @param int       $minlen minimum length of words to count
402
     * @param string    $key    metadata key to list. Uses the fulltext index if not given
403
     * @return array            list of words as the keys and frequency as values
404
     *
405
     * @author Tom N Harris <[email protected]>
406
     */
407
    public function histogram($min=1, $max=0, $minlen=3, $key=null)
408
    {
409
        if ($min < 1)    $min = 1;
410
        if ($max < $min) $max = 0;
411
412
        $result = array();
413
414
        if ($key == 'title') {
415
            $index = $this->getIndex('title', '');
416
            $index = array_count_values($index);
417
            foreach ($index as $val => $cnt) {
418
                if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen) {
419
                    $result[$val] = $cnt;
420
                }
421
            }
422
        } elseif (!is_null($key)) {
423
            $metaname = $this->cleanName($key);
424
            $index = $this->getIndex($metaname.'_i', '');
425
            $val_idx = array();
426
            foreach ($index as $wid => $line) {
427
                $freq = $this->countTuples($line);
428
                if ($freq >= $min && (!$max || $freq <= $max)) {
429
                    $val_idx[$wid] = $freq;
430
                }
431
            }
432
            if (!empty($val_idx)) {
433
                $words = $this->getIndex($metaname.'_w', '');
434
                foreach ($val_idx as $wid => $freq) {
435
                    if (strlen($words[$wid]) >= $minlen) {
436
                        $result[$words[$wid]] = $freq;
437
                    }
438
                }
439
            }
440
        } else {
441
            $FulltextIndex = FulltextIndex::getInstance();
442
            $lengths = $FulltextIndex->listIndexLengths();
443
            foreach ($lengths as $length) {
444
                if ($length < $minlen) continue;
445
                $index = $this->getIndex('i', $length);
446
                $words = null;
447
                foreach ($index as $wid => $line) {
448
                    $freq = $this->countTuples($line);
449
                    if ($freq >= $min && (!$max || $freq <= $max)) {
450
                        if ($words === null) {
451
                            $words = $this->getIndex('w', $length);
452
                        }
453
                        $result[$words[$wid]] = $freq;
454
                    }
455
                }
456
            }
457
        }
458
459
        arsort($result);
460
        return $result;
461
    }
462
463
    /**
464
     * Clear the Metadata Index
465
     *
466
     * @param bool $requireLock should be false only if the caller is resposible for index lock
467
     * @return bool  If the index has been cleared successfully
468
     * @throws Exception\IndexLockException
469
     */
470
    public function clear($requireLock = true)
471
    {
472
        global $conf;
473
474
        if ($requireLock) $this->lock();
475
476
        $knownKeys = $this->getIndex('metadata', '');
477
        foreach ($knownKeys as $metaname) {
478
            @unlink($conf['indexdir'].'/'.$metaname.'_w.idx');
479
            @unlink($conf['indexdir'].'/'.$metaname.'_i.idx');
480
            @unlink($conf['indexdir'].'/'.$metaname.'_p.idx');
481
        }
482
        @unlink($conf['indexdir'].'/title.idx');
483
        @unlink($conf['indexdir'].'/metadata.idx');
484
485
        if ($requireLock) $this->unlock();
486
        return true;
487
    }
488
489
    /**
490
     * Returns the backlinks for a given page
491
     *
492
     * Uses the metadata index.
493
     *
494
     * @param string $id           The id for which links shall be returned
495
     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
496
     * @return array The pages that contain links to the given page
497
     *
498
     * @author     Andreas Gohr <[email protected]>
499
     */
500
    public function backlinks($id, $ignore_perms = false)
501
    {
502
        $result = $this->lookupKey('relation_references', $id);
503
504
        if (!count($result)) return $result;
505
506
        // check ACL permissions
507
        foreach (array_keys($result) as $idx) {
508
            if (($ignore_perms !== true
509
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
510
                ) || !page_exists($result[$idx], '', false)
511
            ) {
512
                unset($result[$idx]);
513
            }
514
        }
515
516
        sort($result);
517
        return $result;
518
    }
519
520
    /**
521
     * Returns the pages that use a given media file
522
     *
523
     * Uses the relation media metadata property and the metadata index.
524
     *
525
     * Note that before 2013-07-31 the second parameter was the maximum number
526
     * of results and permissions were ignored. That's why the parameter is now
527
     * checked to be explicitely set to true (with type bool) in order to be
528
     * compatible with older uses of the function.
529
     *
530
     * @param string $id           The media id to look for
531
     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
532
     * @return array A list of pages that use the given media file
533
     *
534
     * @author     Andreas Gohr <[email protected]>
535
     */
536
    public function mediause($id, $ignore_perms = false)
537
    {
538
        $result = $this->lookupKey('relation_media', $id);
539
540
        if (!count($result)) return $result;
541
542
        // check ACL permissions
543
        foreach (array_keys($result) as $idx) {
544
            if (($ignore_perms !== true
545
                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
546
                ) || !page_exists($result[$idx], '', false)
547
            ) {
548
                unset($result[$idx]);
549
            }
550
        }
551
552
        sort($result);
553
        return $result;
554
    }
555
}
556