FulltextSearch - Code Metrics - Inspection of "Refactor fulltext search functions and class Doku_..." - splitbrain/dokuwiki - Measure and Improve Code Quality continuously with Scrutinizer

Failed Conditions

Pull Request — master (#2943)

by Andreas

created 2020-09-10 15:40 UTC

FulltextSearch F

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	458
Duplicated Lines	0 %

Coupling/Cohesion

Components	2
Dependencies	6

Importance

Changes

Metric	Value
dl	0
loc	458
rs	2
c	0
b	0
f	0
wmc	80
lcom	2
cbo	6

11 Methods

Rating	Name	Size	Complexity
A	getInstance()	7	2
A	__construct()	1	1
A	pageSearch()	15	2
F	pageSearchCallBack()	106	27
B	filterResultsByTime()	19	10
A	pagemtimesorter()	6	1
C	snippet()	106	14
B	snippetRePreprocess()	33	8
B	resultCombine()	22	6
A	resultUnite()	15	4
A	resultComplement()	15	5

How to fix Complexity

<?php

namespace dokuwiki\Search;

use dokuwiki\Extension\Event;
use dokuwiki\Utf8;

// create snippets for the first few results only
const FT_SNIPPET_NUMBER = 15;

/**
 * Class DokuWiki Fulltext Search
 *
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
 * @author     Andreas Gohr <[email protected]>
 */
class FulltextSearch
{
    /** @var FulltextSearch $instance */
    protected static $instance = null;

    /**
     * Get new or existing singleton instance of the FulltextSearch
     *
     * @return FulltextSearch
     */
    public static function getInstance()
    {
        if (is_null(static::$instance)) {
            static::$instance = new static();
        }
        return static::$instance;
    }

    /**
     *  Fulltext Search constructor. prevent direct object creation
     */
    protected function __construct() {}

    /**
     * The fulltext search
     *
     * Returns a list of matching documents for the given query
     *
     * refactored into pageSearch(), pageSearchCallBack() and trigger_event()
     *
     * @param string     $query
     * @param array      $highlight
     * @param string     $sort
     * @param int|string $after  only show results with mtime after this date,
     *                           accepts timestap or strtotime arguments
     * @param int|string $before only show results with mtime before this date,
     *                           accepts timestap or strtotime arguments
     *
     * @return array
     */
    public function pageSearch($query, &$highlight, $sort = null, $after = null, $before = null)
    {
        if ($sort === null) {
            $sort = 'hits';
        }
        $data = [
            'query' => $query,
            'sort' => $sort,
            'after' => $after,
            'before' => $before
        ];
        $data['highlight'] =& $highlight;
        $action = [$this, 'pageSearchCallBack'];
        return Event::createAndTrigger('SEARCH_QUERY_FULLPAGE', $data, $action);
    }

    /**
     * Returns a list of matching documents for the given query
     *
     * @author Andreas Gohr <[email protected]>
     * @author Kazutaka Miyasaka <[email protected]>
     *
     * @param array $data  event data
     * @return array       matching documents
     */
    public function pageSearchCallBack(&$data)
    {
        // parse the given query
        $q = (new QueryParser)->convert($data['query']);
        $data['highlight'] = $q['highlight'];

        if (empty($q['parsed_ary'])) return array();

        // lookup all words found in the query
        $FulltextIndex = FulltextIndex::getInstance();
        $lookup = $FulltextIndex->lookupWords($q['words']);

        // get all pages in this dokuwiki site (!: includes nonexistent pages)
        $pages_all = array();
        foreach ($FulltextIndex->getPages() as $id) {
            $pages_all[$id] = 0; // base: 0 hit
        }

        // process the query
        $stack = array();
        foreach ($q['parsed_ary'] as $token) {
$collection = json_decode($data, true);
if ( ! is_array($collection)) {
    throw new \RuntimeException('$collection must be an array.');
}

foreach ($collection as $item) { /** ... */ }
            switch (substr($token, 0, 3)) {
                case 'W+:':
                case 'W-:':
                case 'W_:': // word
                    $word    = substr($token, 3);
                    $stack[] = (array) $lookup[$word];
                    break;
                case 'P+:':
                case 'P-:': // phrase
                    $phrase = substr($token, 3);
                    // since phrases are always parsed as ((W1)(W2)...(P)),
                    // the end($stack) always points the pages that contain
                    // all words in this phrase
                    $pages  = end($stack);
                    $pages_matched = array();
                    foreach (array_keys($pages) as $id) {
                        $evdata = array(
                            'id' => $id,
                            'phrase' => $phrase,
                            'text' => rawWiki($id)
                        );
                        $event = new Event('FULLTEXT_PHRASE_MATCH', $evdata);
                        if ($event->advise_before() && $event->result !== true) {
                            $text = Utf8\PhpString::strtolower($evdata['text']);
                            if (strpos($text, $phrase) !== false) {
                                $event->result = true;
                            }
                        }
                        $event->advise_after();
                        if ($event->result === true) {
                            $pages_matched[$id] = 0; // phrase: always 0 hit
                        }
                    }
                    $stack[] = $pages_matched;
                    break;
                case 'N+:':
                case 'N-:': // namespace
                    $ns = cleanID(substr($token, 3)) . ':';
                    $pages_matched = array();
                    foreach (array_keys($pages_all) as $id) {
                        if (strpos($id, $ns) === 0) {
                            $pages_matched[$id] = 0; // namespace: always 0 hit
                        }
                    }
                    $stack[] = $pages_matched;
                    break;
                case 'AND': // and operation
                    list($pages1, $pages2) = array_splice($stack, -2);
                    $stack[] = $this->resultCombine(array($pages1, $pages2));
                    break;
                case 'OR':  // or operation
                    list($pages1, $pages2) = array_splice($stack, -2);
                    $stack[] = $this->resultUnite(array($pages1, $pages2));
                    break;
                case 'NOT': // not operation (unary)
                    $pages   = array_pop($stack);
                    $stack[] = $this->resultComplement(array($pages_all, $pages));
                    break;
            }
        }
        $docs = array_pop($stack);

        if (empty($docs)) return array();

        // check: settings, acls, existence
        foreach (array_keys($docs) as $id) {
            if (isHiddenPage($id)
                || auth_quickaclcheck($id) < AUTH_READ
                || !page_exists($id, '', false)
            ) {
                unset($docs[$id]);
            }
        }

        $docs = $this->filterResultsByTime($docs, $data['after'], $data['before']);

        if ($data['sort'] === 'mtime') {
            uksort($docs, [$this, 'pagemtimesorter']);
        } else {
            // sort docs by count
            arsort($docs);
        }

        return $docs;
    }

    /**
     * @param array      $results search results in the form pageid => value
     * @param int|string $after   only returns results with mtime after this date,
     *                            accepts timestap or strtotime arguments
     * @param int|string $before  only returns results with mtime after this date,
     *                            accepts timestap or strtotime arguments
     *
     * @return array
     */
    protected function filterResultsByTime(array $results, $after, $before)
    {
        if ($after || $before) {
            $after = is_int($after) ? $after : strtotime($after);
            $before = is_int($before) ? $before : strtotime($before);

            foreach ($results as $id => $value) {
                $mTime = filemtime(wikiFN($id));
                if ($after && $after > $mTime) {
                    unset($results[$id]);
                    continue;
                }
                if ($before && $before < $mTime) {
                    unset($results[$id]);
                }
            }
        }
        return $results;
    }

    /**
     * Sort pages by their mtime, from newest to oldest
     *
     * @param string $a
     * @param string $b
     *
     * @return int Returns < 0 if $a is newer than $b, > 0 if $b is newer than $a
     *             and 0 if they are of the same age
     */
    protected function pagemtimesorter($a, $b)
    {
        $mtimeA = filemtime(wikiFN($a));
        $mtimeB = filemtime(wikiFN($b));
        return $mtimeB - $mtimeA;
    }

    /**
     * Creates a snippet extract
     *
     * @author Andreas Gohr <[email protected]>
     * @triggers FULLTEXT_SNIPPET_CREATE
     *
     * @param string $id page id
     * @param array $highlight
     * @return mixed
     */
    public function snippet($id, $highlight)
    {
        $text = rawWiki($id);
        $text = str_replace("\xC2\xAD",'',$text); // remove soft-hyphens
        $evdata = array(
            'id'        => $id,
            'text'      => &$text,
            'highlight' => &$highlight,
            'snippet'   => '',
        );

        $evt = new Event('FULLTEXT_SNIPPET_CREATE', $evdata);
        if ($evt->advise_before()) {
            $match = array();
            $snippets = array();
            $utf8_offset = $offset = $end = 0;
            $len = Utf8\PhpString::strlen($text);

            // build a regexp from the phrases to highlight
            $re1 = '(' .
                join(
                    '|',
                    array_map(
                        [$this, 'snippetRePreprocess'],
                        array_map(
                            'preg_quote_cb',
                            array_filter((array) $highlight)
                        )
                    )
                ) .
                ')';
            $re2 = "$re1.{0,75}(?!\\1)$re1";
            $re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";

            for ($cnt=4; $cnt--;) {
                if (0) {
                } elseif (preg_match('/'.$re3.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
                } elseif (preg_match('/'.$re2.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
                } elseif (preg_match('/'.$re1.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
                } else {
                    break;
                }

                list($str, $idx) = $match[0];

                // convert $idx (a byte offset) into a utf8 character offset
                $utf8_idx = Utf8\PhpString::strlen(substr($text, 0, $idx));
                $utf8_len = Utf8\PhpString::strlen($str);

                // establish context, 100 bytes surrounding the match string
                // first look to see if we can go 100 either side,
                // then drop to 50 adding any excess if the other side can't go to 50,
                $pre = min($utf8_idx - $utf8_offset, 100);
                $post = min($len - $utf8_idx - $utf8_len, 100);

                if ($pre > 50 && $post > 50) {
                    $pre = $post = 50;
                } elseif ($pre > 50) {
                    $pre = min($pre, 100 - $post);
                } elseif ($post > 50) {
                    $post = min($post, 100 - $pre);
                } elseif ($offset == 0) {
                    // both are less than 50, means the context is the whole string
                    // make it so and break out of this loop - there is no need for the
                    // complex snippet calculations
                    $snippets = array($text);
                    break;
                }

                // establish context start and end points, try to append to previous
                // context if possible
                $start = $utf8_idx - $pre;
                $append = ($start < $end) ? $end : false;  // still the end of the previous context snippet
                $end = $utf8_idx + $utf8_len + $post;      // now set it to the end of this context

                if ($append) {
0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
                    $snippets[count($snippets)-1] .= Utf8\PhpString::substr($text, $append, $end-$append);
                } else {
                    $snippets[] = Utf8\PhpString::substr($text, $start, $end-$start);
                }

                // set $offset for next match attempt
                // continue matching after the current match
                // if the current match is not the longest possible match starting at the current offset
                // this prevents further matching of this snippet but for possible matches of length
                // smaller than match length + context (at least 50 characters) this match is part of the context
                $utf8_offset = $utf8_idx + $utf8_len;
                $offset = $idx + strlen(Utf8\PhpString::substr($text, $utf8_idx, $utf8_len));
                $offset = Utf8\Clean::correctIdx($text, $offset);
            }

            $m = "\1";
            $snippets = preg_replace('/'.$re1.'/iu', $m.'$1'.$m, $snippets);
            $snippet = preg_replace(
                '/' . $m . '([^' . $m . ']*?)' . $m . '/iu',
                '<strong class="search_hit">$1</strong>',
                hsc(join('... ', $snippets))
            );

            $evdata['snippet'] = $snippet;
        }
        $evt->advise_after();
        unset($evt);

        return $evdata['snippet'];
    }

    /**
     * Wraps a search term in regex boundary checks.
     *
     * @param string $term
     * @return string
     */
    public function snippetRePreprocess($term)
    {
        // do not process asian terms where word boundaries are not explicit
        if (Utf8\Asian::isAsianWords($term)) return $term;

        if (UTF8_PROPERTYSUPPORT) {
            // unicode word boundaries
            // see http://stackoverflow.com/a/2449017/172068
            $BL = '(?<!\pL)';
            $BR = '(?!\pL)';
        } else {
            // not as correct as above, but at least won't break
            $BL = '\b';
            $BR = '\b';
        }

        if (substr($term, 0, 2) == '\\*') {
            $term = substr($term, 2);
        } else {
            $term = $BL.$term;
        }

        if (substr($term, -2, 2) == '\\*') {
            $term = substr($term, 0, -2);
        } else {
            $term = $term.$BR;
        }

        if ($term == $BL || $term == $BR || $term == $BL.$BR) {
            $term = '';
        }
        return $term;
    }

    /**
     * Combine found documents and sum up their scores
     *
     * This function is used to combine searched words with a logical
     * AND. Only documents available in all arrays are returned.
     *
     * based upon PEAR's PHP_Compat function for array_intersect_key()
     *
     * @param array $args An array of page arrays
     * @return array
     */
    protected function resultCombine($args)
    {
        $array_count = count($args);
        if ($array_count == 1) {
            return $args[0];
        }

        $result = array();
        if ($array_count > 1) {
            foreach ($args[0] as $key => $value) {
                $result[$key] = $value;
                for ($i = 1; $i !== $array_count; $i++) {
                    if (!isset($args[$i][$key])) {
                        unset($result[$key]);
                        break;
                    }
                    $result[$key] += $args[$i][$key];
                }
            }
        }
        return $result;
    }

    /**
     * Unites found documents and sum up their scores
     * based upon resultCombine() method
     *
     * @param array $args An array of page arrays
     * @return array
     *
     * @author Kazutaka Miyasaka <[email protected]>
     */
    protected function resultUnite($args)
    {
        $array_count = count($args);
        if ($array_count === 1) {
            return $args[0];
        }

        $result = $args[0];
        for ($i = 1; $i !== $array_count; $i++) {
            foreach (array_keys($args[$i]) as $id) {
                $result[$id] += $args[$i][$id];
            }
        }
        return $result;
    }

    /**
     * Computes the difference of documents using page id for comparison
     * nearly identical to PHP5's array_diff_key()
     *
     * @param array $args An array of page arrays
     * @return array
     *
     * @author Kazutaka Miyasaka <[email protected]>
     */
    protected function resultComplement($args)
    {
        $array_count = count($args);
        if ($array_count === 1) {
            return $args[0];
        }

        $result = $args[0];
        foreach (array_keys($result) as $id) {
            for ($i = 1; $i !== $array_count; $i++) {
                if (isset($args[$i][$id])) unset($result[$id]);
            }
        }
        return $result;
    }
}


1			<?php
2
3			namespace dokuwiki\Search;
4
5			use dokuwiki\Extension\Event;
6			use dokuwiki\Utf8;
7
8			// create snippets for the first few results only
9			const FT_SNIPPET_NUMBER = 15;
10
11			/**
12			* Class DokuWiki Fulltext Search
13			*
14			* @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
15			* @author Andreas Gohr <[email protected]>
16			*/
17			class FulltextSearch
18			{
19			/** @var FulltextSearch $instance */
20			protected static $instance = null;
21
22			/**
23			* Get new or existing singleton instance of the FulltextSearch
24			*
25			* @return FulltextSearch
26			*/
27			public static function getInstance()
28			{
29			if (is_null(static::$instance)) {
30			static::$instance = new static();
31			}
32			return static::$instance;
33			}
34
35			/**
36			* Fulltext Search constructor. prevent direct object creation
37			*/
38			protected function __construct() {}
39
40			/**
41			* The fulltext search
42			*
43			* Returns a list of matching documents for the given query
44			*
45			* refactored into pageSearch(), pageSearchCallBack() and trigger_event()
46			*
47			* @param string $query
48			* @param array $highlight
49			* @param string $sort
50			* @param int\|string $after only show results with mtime after this date,
51			* accepts timestap or strtotime arguments
52			* @param int\|string $before only show results with mtime before this date,
53			* accepts timestap or strtotime arguments
54			*
55			* @return array
56			*/
57			public function pageSearch($query, &$highlight, $sort = null, $after = null, $before = null)
58			{
59			if ($sort === null) {
60			$sort = 'hits';
61			}
62			$data = [
63			'query' => $query,
64			'sort' => $sort,
65			'after' => $after,
66			'before' => $before
67			];
68			$data['highlight'] =& $highlight;
69			$action = [$this, 'pageSearchCallBack'];
70			return Event::createAndTrigger('SEARCH_QUERY_FULLPAGE', $data, $action);
71			}
72
73			/**
74			* Returns a list of matching documents for the given query
75			*
76			* @author Andreas Gohr <[email protected]>
77			* @author Kazutaka Miyasaka <[email protected]>
78			*
79			* @param array $data event data
80			* @return array matching documents
81			*/
82			public function pageSearchCallBack(&$data)
83			{
84			// parse the given query
85			$q = (new QueryParser)->convert($data['query']);
86			$data['highlight'] = $q['highlight'];
87
88			if (empty($q['parsed_ary'])) return array();
89
90			// lookup all words found in the query
91			$FulltextIndex = FulltextIndex::getInstance();
92			$lookup = $FulltextIndex->lookupWords($q['words']);
93
94			// get all pages in this dokuwiki site (!: includes nonexistent pages)
95			$pages_all = array();
96			foreach ($FulltextIndex->getPages() as $id) {
97			$pages_all[$id] = 0; // base: 0 hit
98			}
99
100			// process the query
101			$stack = array();
102			foreach ($q['parsed_ary'] as $token) {
			0 ignored issues – show Bug introduced 2015-11-25 07:06 UTC by Report Bug Copy Issue Report The expression `$q['parsed_ary']` of type `string\|array` is not guaranteed to be traversable. How about adding an additional type check? There are different options of fixing this problem. If you want to be on the safe side, you can add an additional type-check: $collection = json_decode($data, true); if ( ! is_array($collection)) { throw new \RuntimeException('$collection must be an array.'); } foreach ($collection as $item) { /** ... / } If you are sure that the expression is traversable, you might want to add a doc comment cast* to improve IDE auto-completion and static analysis: /** @var array $collection / $collection = json_decode($data, true); foreach ($collection as $item) { /* .. / } Mark the issue as a false-positive*: Just hover the remove button, in the top-right corner of this issue for more options. Loading history...
103			switch (substr($token, 0, 3)) {
104			case 'W+:':
105			case 'W-:':
106			case 'W_:': // word
107			$word = substr($token, 3);
108			$stack[] = (array) $lookup[$word];
109			break;
110			case 'P+:':
111			case 'P-:': // phrase
112			$phrase = substr($token, 3);
113			// since phrases are always parsed as ((W1)(W2)...(P)),
114			// the end($stack) always points the pages that contain
115			// all words in this phrase
116			$pages = end($stack);
117			$pages_matched = array();
118			foreach (array_keys($pages) as $id) {
119			$evdata = array(
120			'id' => $id,
121			'phrase' => $phrase,
122			'text' => rawWiki($id)
123			);
124			$event = new Event('FULLTEXT_PHRASE_MATCH', $evdata);
125			if ($event->advise_before() && $event->result !== true) {
126			$text = Utf8\PhpString::strtolower($evdata['text']);
127			if (strpos($text, $phrase) !== false) {
128			$event->result = true;
129			}
130			}
131			$event->advise_after();
132			if ($event->result === true) {
133			$pages_matched[$id] = 0; // phrase: always 0 hit
134			}
135			}
136			$stack[] = $pages_matched;
137			break;
138			case 'N+:':
139			case 'N-:': // namespace
140			$ns = cleanID(substr($token, 3)) . ':';
141			$pages_matched = array();
142			foreach (array_keys($pages_all) as $id) {
143			if (strpos($id, $ns) === 0) {
144			$pages_matched[$id] = 0; // namespace: always 0 hit
145			}
146			}
147			$stack[] = $pages_matched;
148			break;
149			case 'AND': // and operation
150			list($pages1, $pages2) = array_splice($stack, -2);
151			$stack[] = $this->resultCombine(array($pages1, $pages2));
152			break;
153			case 'OR': // or operation
154			list($pages1, $pages2) = array_splice($stack, -2);
155			$stack[] = $this->resultUnite(array($pages1, $pages2));
156			break;
157			case 'NOT': // not operation (unary)
158			$pages = array_pop($stack);
159			$stack[] = $this->resultComplement(array($pages_all, $pages));
160			break;
161			}
162			}
163			$docs = array_pop($stack);
164
165			if (empty($docs)) return array();
166
167			// check: settings, acls, existence
168			foreach (array_keys($docs) as $id) {
169			if (isHiddenPage($id)
170			\|\| auth_quickaclcheck($id) < AUTH_READ
171			\|\| !page_exists($id, '', false)
172			) {
173			unset($docs[$id]);
174			}
175			}
176
177			$docs = $this->filterResultsByTime($docs, $data['after'], $data['before']);
178
179			if ($data['sort'] === 'mtime') {
180			uksort($docs, [$this, 'pagemtimesorter']);
181			} else {
182			// sort docs by count
183			arsort($docs);
184			}
185
186			return $docs;
187			}
188
189			/**
190			* @param array $results search results in the form pageid => value
191			* @param int\|string $after only returns results with mtime after this date,
192			* accepts timestap or strtotime arguments
193			* @param int\|string $before only returns results with mtime after this date,
194			* accepts timestap or strtotime arguments
195			*
196			* @return array
197			*/
198			protected function filterResultsByTime(array $results, $after, $before)
199			{
200			if ($after \|\| $before) {
201			$after = is_int($after) ? $after : strtotime($after);
202			$before = is_int($before) ? $before : strtotime($before);
203
204			foreach ($results as $id => $value) {
205			$mTime = filemtime(wikiFN($id));
206			if ($after && $after > $mTime) {
207			unset($results[$id]);
208			continue;
209			}
210			if ($before && $before < $mTime) {
211			unset($results[$id]);
212			}
213			}
214			}
215			return $results;
216			}
217
218			/**
219			* Sort pages by their mtime, from newest to oldest
220			*
221			* @param string $a
222			* @param string $b
223			*
224			* @return int Returns < 0 if $a is newer than $b, > 0 if $b is newer than $a
225			* and 0 if they are of the same age
226			*/
227			protected function pagemtimesorter($a, $b)
228			{
229			$mtimeA = filemtime(wikiFN($a));
230			$mtimeB = filemtime(wikiFN($b));
231			return $mtimeB - $mtimeA;
232			}
233
234			/**
235			* Creates a snippet extract
236			*
237			* @author Andreas Gohr <[email protected]>
238			* @triggers FULLTEXT_SNIPPET_CREATE
239			*
240			* @param string $id page id
241			* @param array $highlight
242			* @return mixed
243			*/
244			public function snippet($id, $highlight)
245			{
246			$text = rawWiki($id);
247			$text = str_replace("\xC2\xAD",'',$text); // remove soft-hyphens
248			$evdata = array(
249			'id' => $id,
250			'text' => &$text,
251			'highlight' => &$highlight,
252			'snippet' => '',
253			);
254
255			$evt = new Event('FULLTEXT_SNIPPET_CREATE', $evdata);
256			if ($evt->advise_before()) {
257			$match = array();
258			$snippets = array();
259			$utf8_offset = $offset = $end = 0;
260			$len = Utf8\PhpString::strlen($text);
261
262			// build a regexp from the phrases to highlight
263			$re1 = '(' .
264			join(
265			'\|',
266			array_map(
267			[$this, 'snippetRePreprocess'],
268			array_map(
269			'preg_quote_cb',
270			array_filter((array) $highlight)
271			)
272			)
273			) .
274			')';
275			$re2 = "$re1.{0,75}(?!\\1)$re1";
276			$re3 = "$re1.{0,45}(?!\\1)$re1.{0,45}(?!\\1)(?!\\2)$re1";
277
278			for ($cnt=4; $cnt--;) {
279			if (0) {
280			} elseif (preg_match('/'.$re3.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
281			} elseif (preg_match('/'.$re2.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
282			} elseif (preg_match('/'.$re1.'/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {
283			} else {
284			break;
285			}
286
287			list($str, $idx) = $match[0];
288
289			// convert $idx (a byte offset) into a utf8 character offset
290			$utf8_idx = Utf8\PhpString::strlen(substr($text, 0, $idx));
291			$utf8_len = Utf8\PhpString::strlen($str);
292
293			// establish context, 100 bytes surrounding the match string
294			// first look to see if we can go 100 either side,
295			// then drop to 50 adding any excess if the other side can't go to 50,
296			$pre = min($utf8_idx - $utf8_offset, 100);
297			$post = min($len - $utf8_idx - $utf8_len, 100);
298
299			if ($pre > 50 && $post > 50) {
300			$pre = $post = 50;
301			} elseif ($pre > 50) {
302			$pre = min($pre, 100 - $post);
303			} elseif ($post > 50) {
304			$post = min($post, 100 - $pre);
305			} elseif ($offset == 0) {
306			// both are less than 50, means the context is the whole string
307			// make it so and break out of this loop - there is no need for the
308			// complex snippet calculations
309			$snippets = array($text);
310			break;
311			}
312
313			// establish context start and end points, try to append to previous
314			// context if possible
315			$start = $utf8_idx - $pre;
316			$append = ($start < $end) ? $end : false; // still the end of the previous context snippet
317			$end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
318
319			if ($append) {
			0 ignored issues – show Bug Best Practice introduced 2015-11-25 07:06 UTC by Report Bug Copy Issue Report The expression `$append` of type `integer\|false` is loosely compared to `true`; this is ambiguous if the integer can be zero. You might want to explicitly use `!== null` instead. In PHP, under loose comparison (like `==`, or `!=`, or `switch` conditions), values of different types might be equal. For `integer` values, zero is a special case, in particular the following results might be unexpected: 0 == false // true 0 == null // true 123 == false // false 123 == null // false // It is often better to use strict comparison 0 === false // false 0 === null // false Loading history...
320			$snippets[count($snippets)-1] .= Utf8\PhpString::substr($text, $append, $end-$append);
321			} else {
322			$snippets[] = Utf8\PhpString::substr($text, $start, $end-$start);
323			}
324
325			// set $offset for next match attempt
326			// continue matching after the current match
327			// if the current match is not the longest possible match starting at the current offset
328			// this prevents further matching of this snippet but for possible matches of length
329			// smaller than match length + context (at least 50 characters) this match is part of the context
330			$utf8_offset = $utf8_idx + $utf8_len;
331			$offset = $idx + strlen(Utf8\PhpString::substr($text, $utf8_idx, $utf8_len));
332			$offset = Utf8\Clean::correctIdx($text, $offset);
333			}
334
335			$m = "\1";
336			$snippets = preg_replace('/'.$re1.'/iu', $m.'$1'.$m, $snippets);
337			$snippet = preg_replace(
338			'/' . $m . '([^' . $m . ']*?)' . $m . '/iu',
339			'<strong class="search_hit">$1</strong>',
340			hsc(join('... ', $snippets))
341			);
342
343			$evdata['snippet'] = $snippet;
344			}
345			$evt->advise_after();
346			unset($evt);
347
348			return $evdata['snippet'];
349			}
350
351			/**
352			* Wraps a search term in regex boundary checks.
353			*
354			* @param string $term
355			* @return string
356			*/
357			public function snippetRePreprocess($term)
358			{
359			// do not process asian terms where word boundaries are not explicit
360			if (Utf8\Asian::isAsianWords($term)) return $term;
361
362			if (UTF8_PROPERTYSUPPORT) {
363			// unicode word boundaries
364			// see http://stackoverflow.com/a/2449017/172068
365			$BL = '(?<!\pL)';
366			$BR = '(?!\pL)';
367			} else {
368			// not as correct as above, but at least won't break
369			$BL = '\b';
370			$BR = '\b';
371			}
372
373			if (substr($term, 0, 2) == '\\*') {
374			$term = substr($term, 2);
375			} else {
376			$term = $BL.$term;
377			}
378
379			if (substr($term, -2, 2) == '\\*') {
380			$term = substr($term, 0, -2);
381			} else {
382			$term = $term.$BR;
383			}
384
385			if ($term == $BL \|\| $term == $BR \|\| $term == $BL.$BR) {
386			$term = '';
387			}
388			return $term;
389			}
390
391			/**
392			* Combine found documents and sum up their scores
393			*
394			* This function is used to combine searched words with a logical
395			* AND. Only documents available in all arrays are returned.
396			*
397			* based upon PEAR's PHP_Compat function for array_intersect_key()
398			*
399			* @param array $args An array of page arrays
400			* @return array
401			*/
402			protected function resultCombine($args)
403			{
404			$array_count = count($args);
405			if ($array_count == 1) {
406			return $args[0];
407			}
408
409			$result = array();
410			if ($array_count > 1) {
411			foreach ($args[0] as $key => $value) {
412			$result[$key] = $value;
413			for ($i = 1; $i !== $array_count; $i++) {
414			if (!isset($args[$i][$key])) {
415			unset($result[$key]);
416			break;
417			}
418			$result[$key] += $args[$i][$key];
419			}
420			}
421			}
422			return $result;
423			}
424
425			/**
426			* Unites found documents and sum up their scores
427			* based upon resultCombine() method
428			*
429			* @param array $args An array of page arrays
430			* @return array
431			*
432			* @author Kazutaka Miyasaka <[email protected]>
433			*/
434			protected function resultUnite($args)
435			{
436			$array_count = count($args);
437			if ($array_count === 1) {
438			return $args[0];
439			}
440
441			$result = $args[0];
442			for ($i = 1; $i !== $array_count; $i++) {
443			foreach (array_keys($args[$i]) as $id) {
444			$result[$id] += $args[$i][$id];
445			}
446			}
447			return $result;
448			}
449
450			/**
451			* Computes the difference of documents using page id for comparison
452			* nearly identical to PHP5's array_diff_key()
453			*
454			* @param array $args An array of page arrays
455			* @return array
456			*
457			* @author Kazutaka Miyasaka <[email protected]>
458			*/
459			protected function resultComplement($args)
460			{
461			$array_count = count($args);
462			if ($array_count === 1) {
463			return $args[0];
464			}
465
466			$result = $args[0];
467			foreach (array_keys($result) as $id) {
468			for ($i = 1; $i !== $array_count; $i++) {
469			if (isset($args[$i][$id])) unset($result[$id]);
470			}
471			}
472			return $result;
473			}
474			}
475

splitbrain / dokuwiki

Pull Request — master (#2943)

FulltextSearch F

Complexity

Size/Duplication

Coupling/Cohesion

Importance

11 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like