|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* Basic search engine highlighting |
|
4
|
|
|
* |
|
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
6
|
|
|
* it under the terms of the GNU General Public License as published by |
|
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
8
|
|
|
* (at your option) any later version. |
|
9
|
|
|
* |
|
10
|
|
|
* This program is distributed in the hope that it will be useful, |
|
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13
|
|
|
* GNU General Public License for more details. |
|
14
|
|
|
* |
|
15
|
|
|
* You should have received a copy of the GNU General Public License along |
|
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
19
|
|
|
* |
|
20
|
|
|
* @file |
|
21
|
|
|
* @ingroup Search |
|
22
|
|
|
*/ |
|
23
|
|
|
|
|
24
|
|
|
/** |
|
25
|
|
|
* Highlight bits of wikitext |
|
26
|
|
|
* |
|
27
|
|
|
* @ingroup Search |
|
28
|
|
|
*/ |
|
29
|
|
|
class SearchHighlighter { |
|
30
|
|
|
protected $mCleanWikitext = true; |
|
31
|
|
|
|
|
32
|
|
|
function __construct( $cleanupWikitext = true ) { |
|
33
|
|
|
$this->mCleanWikitext = $cleanupWikitext; |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
/** |
|
37
|
|
|
* Wikitext highlighting when $wgAdvancedSearchHighlighting = true |
|
38
|
|
|
* |
|
39
|
|
|
* @param string $text |
|
40
|
|
|
* @param array $terms Terms to highlight (not html escaped but |
|
41
|
|
|
* regex escaped via SearchDatabase::regexTerm()) |
|
42
|
|
|
* @param int $contextlines |
|
43
|
|
|
* @param int $contextchars |
|
44
|
|
|
* @return string |
|
45
|
|
|
*/ |
|
46
|
|
|
public function highlightText( $text, $terms, $contextlines, $contextchars ) { |
|
47
|
|
|
global $wgContLang, $wgSearchHighlightBoundaries; |
|
48
|
|
|
|
|
49
|
|
|
if ( $text == '' ) { |
|
50
|
|
|
return ''; |
|
51
|
|
|
} |
|
52
|
|
|
|
|
53
|
|
|
// spli text into text + templates/links/tables |
|
54
|
|
|
$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
|
55
|
|
|
// first capture group is for detecting nested templates/links/tables/references |
|
56
|
|
|
$endPatterns = [ |
|
57
|
|
|
1 => '/(\{\{)|(\}\})/', // template |
|
58
|
|
|
2 => '/(\[\[)|(\]\])/', // image |
|
59
|
|
|
3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table |
|
60
|
|
|
|
|
61
|
|
|
// @todo FIXME: This should prolly be a hook or something |
|
62
|
|
|
// instead of hardcoding a class name from the Cite extension |
|
63
|
|
|
if ( class_exists( 'Cite' ) ) { |
|
64
|
|
|
$spat .= '|(<ref>)'; // references via cite extension |
|
65
|
|
|
$endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
|
66
|
|
|
} |
|
67
|
|
|
$spat .= '/'; |
|
68
|
|
|
$textExt = []; // text extracts |
|
69
|
|
|
$otherExt = []; // other extracts |
|
70
|
|
|
$start = 0; |
|
71
|
|
|
$textLen = strlen( $text ); |
|
72
|
|
|
$count = 0; // sequence number to maintain ordering |
|
73
|
|
|
while ( $start < $textLen ) { |
|
74
|
|
|
// find start of template/image/table |
|
75
|
|
|
if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) { |
|
76
|
|
|
$epat = ''; |
|
77
|
|
|
foreach ( $matches as $key => $val ) { |
|
78
|
|
|
if ( $key > 0 && $val[1] != - 1 ) { |
|
79
|
|
|
if ( $key == 2 ) { |
|
80
|
|
|
// see if this is an image link |
|
81
|
|
|
$ns = substr( $val[0], 2, - 1 ); |
|
82
|
|
|
if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) { |
|
83
|
|
|
break; |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
} |
|
87
|
|
|
$epat = $endPatterns[$key]; |
|
88
|
|
|
$this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
|
89
|
|
|
$start = $val[1]; |
|
90
|
|
|
break; |
|
91
|
|
|
} |
|
92
|
|
|
} |
|
93
|
|
|
if ( $epat ) { |
|
94
|
|
|
// find end (and detect any nested elements) |
|
95
|
|
|
$level = 0; |
|
96
|
|
|
$offset = $start + 1; |
|
97
|
|
|
$found = false; |
|
98
|
|
|
while ( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ) { |
|
99
|
|
|
if ( array_key_exists( 2, $endMatches ) ) { |
|
100
|
|
|
// found end |
|
101
|
|
|
if ( $level == 0 ) { |
|
102
|
|
|
$len = strlen( $endMatches[2][0] ); |
|
103
|
|
|
$off = $endMatches[2][1]; |
|
104
|
|
|
$this->splitAndAdd( $otherExt, $count, |
|
105
|
|
|
substr( $text, $start, $off + $len - $start ) ); |
|
106
|
|
|
$start = $off + $len; |
|
107
|
|
|
$found = true; |
|
108
|
|
|
break; |
|
109
|
|
|
} else { |
|
110
|
|
|
// end of nested element |
|
111
|
|
|
$level -= 1; |
|
112
|
|
|
} |
|
113
|
|
|
} else { |
|
114
|
|
|
// nested |
|
115
|
|
|
$level += 1; |
|
116
|
|
|
} |
|
117
|
|
|
$offset = $endMatches[0][1] + strlen( $endMatches[0][0] ); |
|
118
|
|
|
} |
|
119
|
|
|
if ( !$found ) { |
|
120
|
|
|
// couldn't find appropriate closing tag, skip |
|
121
|
|
|
$this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen( $matches[0][0] ) ) ); |
|
122
|
|
|
$start += strlen( $matches[0][0] ); |
|
123
|
|
|
} |
|
124
|
|
|
continue; |
|
125
|
|
|
} |
|
126
|
|
|
} |
|
127
|
|
|
// else: add as text extract |
|
128
|
|
|
$this->splitAndAdd( $textExt, $count, substr( $text, $start ) ); |
|
129
|
|
|
break; |
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
$all = $textExt + $otherExt; // these have disjunct key sets |
|
133
|
|
|
|
|
134
|
|
|
// prepare regexps |
|
135
|
|
|
foreach ( $terms as $index => $term ) { |
|
136
|
|
|
// manually do upper/lowercase stuff for utf-8 since PHP won't do it |
|
137
|
|
|
if ( preg_match( '/[\x80-\xff]/', $term ) ) { |
|
138
|
|
|
$terms[$index] = preg_replace_callback( |
|
139
|
|
|
'/./us', |
|
140
|
|
|
[ $this, 'caseCallback' ], |
|
141
|
|
|
$terms[$index] |
|
142
|
|
|
); |
|
143
|
|
|
} else { |
|
144
|
|
|
$terms[$index] = $term; |
|
145
|
|
|
} |
|
146
|
|
|
} |
|
147
|
|
|
$anyterm = implode( '|', $terms ); |
|
148
|
|
|
$phrase = implode( "$wgSearchHighlightBoundaries+", $terms ); |
|
149
|
|
|
// @todo FIXME: A hack to scale contextchars, a correct solution |
|
150
|
|
|
// would be to have contextchars actually be char and not byte |
|
151
|
|
|
// length, and do proper utf-8 substrings and lengths everywhere, |
|
152
|
|
|
// but PHP is making that very hard and unclean to implement :( |
|
153
|
|
|
$scale = strlen( $anyterm ) / mb_strlen( $anyterm ); |
|
154
|
|
|
$contextchars = intval( $contextchars * $scale ); |
|
155
|
|
|
|
|
156
|
|
|
$patPre = "(^|$wgSearchHighlightBoundaries)"; |
|
157
|
|
|
$patPost = "($wgSearchHighlightBoundaries|$)"; |
|
158
|
|
|
|
|
159
|
|
|
$pat1 = "/(" . $phrase . ")/ui"; |
|
160
|
|
|
$pat2 = "/$patPre(" . $anyterm . ")$patPost/ui"; |
|
161
|
|
|
|
|
162
|
|
|
$left = $contextlines; |
|
163
|
|
|
|
|
164
|
|
|
$snippets = []; |
|
165
|
|
|
$offsets = []; |
|
166
|
|
|
|
|
167
|
|
|
// show beginning only if it contains all words |
|
168
|
|
|
$first = 0; |
|
169
|
|
|
$firstText = ''; |
|
170
|
|
|
foreach ( $textExt as $index => $line ) { |
|
171
|
|
|
if ( strlen( $line ) > 0 && $line[0] != ';' && $line[0] != ':' ) { |
|
172
|
|
|
$firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
|
173
|
|
|
$first = $index; |
|
174
|
|
|
break; |
|
175
|
|
|
} |
|
176
|
|
|
} |
|
177
|
|
|
if ( $firstText ) { |
|
178
|
|
|
$succ = true; |
|
179
|
|
|
// check if first text contains all terms |
|
180
|
|
|
foreach ( $terms as $term ) { |
|
181
|
|
|
if ( !preg_match( "/$patPre" . $term . "$patPost/ui", $firstText ) ) { |
|
182
|
|
|
$succ = false; |
|
183
|
|
|
break; |
|
184
|
|
|
} |
|
185
|
|
|
} |
|
186
|
|
|
if ( $succ ) { |
|
187
|
|
|
$snippets[$first] = $firstText; |
|
188
|
|
|
$offsets[$first] = 0; |
|
189
|
|
|
} |
|
190
|
|
|
} |
|
191
|
|
|
if ( !$snippets ) { |
|
192
|
|
|
// match whole query on text |
|
193
|
|
|
$this->process( $pat1, $textExt, $left, $contextchars, $snippets, $offsets ); |
|
194
|
|
|
// match whole query on templates/tables/images |
|
195
|
|
|
$this->process( $pat1, $otherExt, $left, $contextchars, $snippets, $offsets ); |
|
196
|
|
|
// match any words on text |
|
197
|
|
|
$this->process( $pat2, $textExt, $left, $contextchars, $snippets, $offsets ); |
|
198
|
|
|
// match any words on templates/tables/images |
|
199
|
|
|
$this->process( $pat2, $otherExt, $left, $contextchars, $snippets, $offsets ); |
|
200
|
|
|
|
|
201
|
|
|
ksort( $snippets ); |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
// add extra chars to each snippet to make snippets constant size |
|
205
|
|
|
$extended = []; |
|
206
|
|
|
if ( count( $snippets ) == 0 ) { |
|
207
|
|
|
// couldn't find the target words, just show beginning of article |
|
208
|
|
|
if ( array_key_exists( $first, $all ) ) { |
|
209
|
|
|
$targetchars = $contextchars * $contextlines; |
|
210
|
|
|
$snippets[$first] = ''; |
|
211
|
|
|
$offsets[$first] = 0; |
|
212
|
|
|
} |
|
213
|
|
|
} else { |
|
214
|
|
|
// if begin of the article contains the whole phrase, show only that !! |
|
215
|
|
|
if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) |
|
216
|
|
|
&& $offsets[$first] < $contextchars * 2 ) { |
|
217
|
|
|
$snippets = [ $first => $snippets[$first] ]; |
|
218
|
|
|
} |
|
219
|
|
|
|
|
220
|
|
|
// calc by how much to extend existing snippets |
|
221
|
|
|
$targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) ); |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
foreach ( $snippets as $index => $line ) { |
|
225
|
|
|
$extended[$index] = $line; |
|
226
|
|
|
$len = strlen( $line ); |
|
227
|
|
|
if ( $len < $targetchars - 20 ) { |
|
228
|
|
|
// complete this line |
|
229
|
|
|
if ( $len < strlen( $all[$index] ) ) { |
|
230
|
|
|
$extended[$index] = $this->extract( |
|
231
|
|
|
$all[$index], |
|
232
|
|
|
$offsets[$index], |
|
233
|
|
|
$offsets[$index] + $targetchars, |
|
|
|
|
|
|
234
|
|
|
$offsets[$index] |
|
235
|
|
|
); |
|
236
|
|
|
$len = strlen( $extended[$index] ); |
|
237
|
|
|
} |
|
238
|
|
|
|
|
239
|
|
|
// add more lines |
|
240
|
|
|
$add = $index + 1; |
|
241
|
|
|
while ( $len < $targetchars - 20 |
|
242
|
|
|
&& array_key_exists( $add, $all ) |
|
243
|
|
|
&& !array_key_exists( $add, $snippets ) ) { |
|
244
|
|
|
$offsets[$add] = 0; |
|
245
|
|
|
$tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
|
246
|
|
|
$extended[$add] = $tt; |
|
247
|
|
|
$len += strlen( $tt ); |
|
248
|
|
|
$add++; |
|
249
|
|
|
} |
|
250
|
|
|
} |
|
251
|
|
|
} |
|
252
|
|
|
|
|
253
|
|
|
// $snippets = array_map( 'htmlspecialchars', $extended ); |
|
254
|
|
|
$snippets = $extended; |
|
255
|
|
|
$last = - 1; |
|
256
|
|
|
$extract = ''; |
|
257
|
|
|
foreach ( $snippets as $index => $line ) { |
|
258
|
|
|
if ( $last == - 1 ) { |
|
259
|
|
|
$extract .= $line; // first line |
|
260
|
|
|
} elseif ( $last + 1 == $index |
|
261
|
|
|
&& $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) |
|
262
|
|
|
) { |
|
263
|
|
|
$extract .= " " . $line; // continous lines |
|
264
|
|
|
} else { |
|
265
|
|
|
$extract .= '<b> ... </b>' . $line; |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
$last = $index; |
|
269
|
|
|
} |
|
270
|
|
|
if ( $extract ) { |
|
271
|
|
|
$extract .= '<b> ... </b>'; |
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
|
|
$processed = []; |
|
275
|
|
|
foreach ( $terms as $term ) { |
|
276
|
|
|
if ( !isset( $processed[$term] ) ) { |
|
277
|
|
|
$pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word |
|
278
|
|
|
$extract = preg_replace( $pat3, |
|
279
|
|
|
"\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
|
280
|
|
|
$processed[$term] = true; |
|
281
|
|
|
} |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
return $extract; |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
/** |
|
288
|
|
|
* Split text into lines and add it to extracts array |
|
289
|
|
|
* |
|
290
|
|
|
* @param array $extracts Index -> $line |
|
291
|
|
|
* @param int $count |
|
292
|
|
|
* @param string $text |
|
293
|
|
|
*/ |
|
294
|
|
|
function splitAndAdd( &$extracts, &$count, $text ) { |
|
295
|
|
|
$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); |
|
296
|
|
|
foreach ( $split as $line ) { |
|
297
|
|
|
$tt = trim( $line ); |
|
298
|
|
|
if ( $tt ) { |
|
299
|
|
|
$extracts[$count++] = $tt; |
|
300
|
|
|
} |
|
301
|
|
|
} |
|
302
|
|
|
} |
|
303
|
|
|
|
|
304
|
|
|
/** |
|
305
|
|
|
* Do manual case conversion for non-ascii chars |
|
306
|
|
|
* |
|
307
|
|
|
* @param array $matches |
|
308
|
|
|
* @return string |
|
309
|
|
|
*/ |
|
310
|
|
|
function caseCallback( $matches ) { |
|
311
|
|
|
global $wgContLang; |
|
312
|
|
|
if ( strlen( $matches[0] ) > 1 ) { |
|
313
|
|
|
return '[' . $wgContLang->lc( $matches[0] ) . $wgContLang->uc( $matches[0] ) . ']'; |
|
314
|
|
|
} else { |
|
315
|
|
|
return $matches[0]; |
|
316
|
|
|
} |
|
317
|
|
|
} |
|
318
|
|
|
|
|
319
|
|
|
/** |
|
320
|
|
|
* Extract part of the text from start to end, but by |
|
321
|
|
|
* not chopping up words |
|
322
|
|
|
* @param string $text |
|
323
|
|
|
* @param int $start |
|
324
|
|
|
* @param int $end |
|
325
|
|
|
* @param int $posStart (out) actual start position |
|
326
|
|
|
* @param int $posEnd (out) actual end position |
|
327
|
|
|
* @return string |
|
328
|
|
|
*/ |
|
329
|
|
|
function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) { |
|
330
|
|
|
if ( $start != 0 ) { |
|
331
|
|
|
$start = $this->position( $text, $start, 1 ); |
|
332
|
|
|
} |
|
333
|
|
|
if ( $end >= strlen( $text ) ) { |
|
334
|
|
|
$end = strlen( $text ); |
|
335
|
|
|
} else { |
|
336
|
|
|
$end = $this->position( $text, $end ); |
|
337
|
|
|
} |
|
338
|
|
|
|
|
339
|
|
|
if ( !is_null( $posStart ) ) { |
|
340
|
|
|
$posStart = $start; |
|
341
|
|
|
} |
|
342
|
|
|
if ( !is_null( $posEnd ) ) { |
|
343
|
|
|
$posEnd = $end; |
|
344
|
|
|
} |
|
345
|
|
|
|
|
346
|
|
|
if ( $end > $start ) { |
|
347
|
|
|
return substr( $text, $start, $end - $start ); |
|
348
|
|
|
} else { |
|
349
|
|
|
return ''; |
|
350
|
|
|
} |
|
351
|
|
|
} |
|
352
|
|
|
|
|
353
|
|
|
/** |
|
354
|
|
|
* Find a nonletter near a point (index) in the text |
|
355
|
|
|
* |
|
356
|
|
|
* @param string $text |
|
357
|
|
|
* @param int $point |
|
358
|
|
|
* @param int $offset Offset to found index |
|
359
|
|
|
* @return int Nearest nonletter index, or beginning of utf8 char if none |
|
360
|
|
|
*/ |
|
361
|
|
|
function position( $text, $point, $offset = 0 ) { |
|
362
|
|
|
$tolerance = 10; |
|
363
|
|
|
$s = max( 0, $point - $tolerance ); |
|
364
|
|
|
$l = min( strlen( $text ), $point + $tolerance ) - $s; |
|
365
|
|
|
$m = []; |
|
366
|
|
|
|
|
367
|
|
|
if ( preg_match( |
|
368
|
|
|
'/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', |
|
369
|
|
|
substr( $text, $s, $l ), |
|
370
|
|
|
$m, |
|
371
|
|
|
PREG_OFFSET_CAPTURE |
|
372
|
|
|
) ) { |
|
373
|
|
|
return $m[0][1] + $s + $offset; |
|
374
|
|
|
} else { |
|
375
|
|
|
// check if point is on a valid first UTF8 char |
|
376
|
|
|
$char = ord( $text[$point] ); |
|
377
|
|
|
while ( $char >= 0x80 && $char < 0xc0 ) { |
|
378
|
|
|
// skip trailing bytes |
|
379
|
|
|
$point++; |
|
380
|
|
|
if ( $point >= strlen( $text ) ) { |
|
381
|
|
|
return strlen( $text ); |
|
382
|
|
|
} |
|
383
|
|
|
$char = ord( $text[$point] ); |
|
384
|
|
|
} |
|
385
|
|
|
|
|
386
|
|
|
return $point; |
|
387
|
|
|
|
|
388
|
|
|
} |
|
389
|
|
|
} |
|
390
|
|
|
|
|
391
|
|
|
/** |
|
392
|
|
|
* Search extracts for a pattern, and return snippets |
|
393
|
|
|
* |
|
394
|
|
|
* @param string $pattern Regexp for matching lines |
|
395
|
|
|
* @param array $extracts Extracts to search |
|
396
|
|
|
* @param int $linesleft Number of extracts to make |
|
397
|
|
|
* @param int $contextchars Length of snippet |
|
398
|
|
|
* @param array $out Map for highlighted snippets |
|
399
|
|
|
* @param array $offsets Map of starting points of snippets |
|
400
|
|
|
* @protected |
|
401
|
|
|
*/ |
|
402
|
|
|
function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { |
|
403
|
|
|
if ( $linesleft == 0 ) { |
|
404
|
|
|
return; // nothing to do |
|
405
|
|
|
} |
|
406
|
|
|
foreach ( $extracts as $index => $line ) { |
|
407
|
|
|
if ( array_key_exists( $index, $out ) ) { |
|
408
|
|
|
continue; // this line already highlighted |
|
409
|
|
|
} |
|
410
|
|
|
|
|
411
|
|
|
$m = []; |
|
412
|
|
|
if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { |
|
413
|
|
|
continue; |
|
414
|
|
|
} |
|
415
|
|
|
|
|
416
|
|
|
$offset = $m[0][1]; |
|
417
|
|
|
$len = strlen( $m[0][0] ); |
|
418
|
|
|
if ( $offset + $len < $contextchars ) { |
|
419
|
|
|
$begin = 0; |
|
420
|
|
|
} elseif ( $len > $contextchars ) { |
|
421
|
|
|
$begin = $offset; |
|
422
|
|
|
} else { |
|
423
|
|
|
$begin = $offset + intval( ( $len - $contextchars ) / 2 ); |
|
424
|
|
|
} |
|
425
|
|
|
|
|
426
|
|
|
$end = $begin + $contextchars; |
|
427
|
|
|
|
|
428
|
|
|
$posBegin = $begin; |
|
429
|
|
|
// basic snippet from this line |
|
430
|
|
|
$out[$index] = $this->extract( $line, $begin, $end, $posBegin ); |
|
431
|
|
|
$offsets[$index] = $posBegin; |
|
432
|
|
|
$linesleft--; |
|
433
|
|
|
if ( $linesleft == 0 ) { |
|
434
|
|
|
return; |
|
435
|
|
|
} |
|
436
|
|
|
} |
|
437
|
|
|
} |
|
438
|
|
|
|
|
439
|
|
|
/** |
|
440
|
|
|
* Basic wikitext removal |
|
441
|
|
|
* @protected |
|
442
|
|
|
* @param string $text |
|
443
|
|
|
* @return mixed |
|
444
|
|
|
*/ |
|
445
|
|
|
function removeWiki( $text ) { |
|
446
|
|
|
$text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text ); |
|
447
|
|
|
$text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text ); |
|
448
|
|
|
$text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text ); |
|
449
|
|
|
$text = preg_replace_callback( |
|
450
|
|
|
"/\\[\\[([^|]+\\|)(.*?)\\]\\]/", |
|
451
|
|
|
[ $this, 'linkReplace' ], |
|
452
|
|
|
$text |
|
453
|
|
|
); |
|
454
|
|
|
$text = preg_replace( "/<\/?[^>]+>/", "", $text ); |
|
455
|
|
|
$text = preg_replace( "/'''''/", "", $text ); |
|
456
|
|
|
$text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text ); |
|
457
|
|
|
$text = preg_replace( "/''/", "", $text ); |
|
458
|
|
|
|
|
459
|
|
|
return $text; |
|
460
|
|
|
} |
|
461
|
|
|
|
|
462
|
|
|
/** |
|
463
|
|
|
* callback to replace [[target|caption]] kind of links, if |
|
464
|
|
|
* the target is category or image, leave it |
|
465
|
|
|
* |
|
466
|
|
|
* @param array $matches |
|
467
|
|
|
* @return string |
|
468
|
|
|
*/ |
|
469
|
|
|
function linkReplace( $matches ) { |
|
470
|
|
|
$colon = strpos( $matches[1], ':' ); |
|
471
|
|
|
if ( $colon === false ) { |
|
472
|
|
|
return $matches[2]; // replace with caption |
|
473
|
|
|
} |
|
474
|
|
|
global $wgContLang; |
|
475
|
|
|
$ns = substr( $matches[1], 0, $colon ); |
|
476
|
|
|
$index = $wgContLang->getNsIndex( $ns ); |
|
477
|
|
|
if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) { |
|
478
|
|
|
return $matches[0]; // return the whole thing |
|
479
|
|
|
} else { |
|
480
|
|
|
return $matches[2]; |
|
481
|
|
|
} |
|
482
|
|
|
} |
|
483
|
|
|
|
|
484
|
|
|
/** |
|
485
|
|
|
* Simple & fast snippet extraction, but gives completely unrelevant |
|
486
|
|
|
* snippets |
|
487
|
|
|
* |
|
488
|
|
|
* Used when $wgAdvancedSearchHighlighting is false. |
|
489
|
|
|
* |
|
490
|
|
|
* @param string $text |
|
491
|
|
|
* @param array $terms Escaped for regex by SearchDatabase::regexTerm() |
|
492
|
|
|
* @param int $contextlines |
|
493
|
|
|
* @param int $contextchars |
|
494
|
|
|
* @return string |
|
495
|
|
|
*/ |
|
496
|
|
|
public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { |
|
497
|
|
|
global $wgContLang; |
|
498
|
|
|
|
|
499
|
|
|
$lines = explode( "\n", $text ); |
|
500
|
|
|
|
|
501
|
|
|
$terms = implode( '|', $terms ); |
|
502
|
|
|
$max = intval( $contextchars ) + 1; |
|
503
|
|
|
$pat1 = "/(.*)($terms)(.{0,$max})/i"; |
|
504
|
|
|
|
|
505
|
|
|
$lineno = 0; |
|
506
|
|
|
|
|
507
|
|
|
$extract = ""; |
|
508
|
|
|
foreach ( $lines as $line ) { |
|
509
|
|
|
if ( 0 == $contextlines ) { |
|
510
|
|
|
break; |
|
511
|
|
|
} |
|
512
|
|
|
++$lineno; |
|
513
|
|
|
$m = []; |
|
514
|
|
|
if ( !preg_match( $pat1, $line, $m ) ) { |
|
515
|
|
|
continue; |
|
516
|
|
|
} |
|
517
|
|
|
--$contextlines; |
|
518
|
|
|
// truncate function changes ... to relevant i18n message. |
|
519
|
|
|
$pre = $wgContLang->truncate( $m[1], - $contextchars, '...', false ); |
|
520
|
|
|
|
|
521
|
|
|
if ( count( $m ) < 3 ) { |
|
522
|
|
|
$post = ''; |
|
523
|
|
|
} else { |
|
524
|
|
|
$post = $wgContLang->truncate( $m[3], $contextchars, '...', false ); |
|
525
|
|
|
} |
|
526
|
|
|
|
|
527
|
|
|
$found = $m[2]; |
|
528
|
|
|
|
|
529
|
|
|
$line = htmlspecialchars( $pre . $found . $post ); |
|
530
|
|
|
$pat2 = '/(' . $terms . ")/i"; |
|
531
|
|
|
$line = preg_replace( $pat2, "<span class='searchmatch'>\\1</span>", $line ); |
|
532
|
|
|
|
|
533
|
|
|
$extract .= "${line}\n"; |
|
534
|
|
|
} |
|
535
|
|
|
|
|
536
|
|
|
return $extract; |
|
537
|
|
|
} |
|
538
|
|
|
|
|
539
|
|
|
/** |
|
540
|
|
|
* Returns the first few lines of the text |
|
541
|
|
|
* |
|
542
|
|
|
* @param string $text |
|
543
|
|
|
* @param int $contextlines Max number of returned lines |
|
544
|
|
|
* @param int $contextchars Average number of characters per line |
|
545
|
|
|
* @return string |
|
546
|
|
|
*/ |
|
547
|
|
|
public function highlightNone( $text, $contextlines, $contextchars ) { |
|
548
|
|
|
$match = []; |
|
549
|
|
|
$text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line |
|
550
|
|
|
$text = str_replace( "\n\n", "\n", $text ); // remove empty lines |
|
551
|
|
|
preg_match( "/^(.*\n){0,$contextlines}/", $text, $match ); |
|
552
|
|
|
|
|
553
|
|
|
// Trim and limit to max number of chars |
|
554
|
|
|
$text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) ); |
|
555
|
|
|
return str_replace( "\n", '<br>', $text ); |
|
556
|
|
|
} |
|
557
|
|
|
} |
|
558
|
|
|
|
If you define a variable conditionally, it can happen that it is not defined for all execution paths.
Let’s take a look at an example:
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.
Available Fixes
Check for existence of the variable explicitly:
Define a default value for the variable:
Add a value for the missing path: