1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* Copyright (c) |
4
|
|
|
* - 2006-2013, Ivan Sagalaev ([email protected]), highlight.js |
5
|
|
|
* (original author) |
6
|
|
|
* - 2013-2019, Geert Bergman ([email protected]), highlight.php |
7
|
|
|
* - 2014 Daniel Lynge, highlight.php (contributor) |
8
|
|
|
* |
9
|
|
|
* Redistribution and use in source and binary forms, with or without |
10
|
|
|
* modification, are permitted provided that the following conditions are met: |
11
|
|
|
* |
12
|
|
|
* 1. Redistributions of source code must retain the above copyright notice, |
13
|
|
|
* this list of conditions and the following disclaimer. |
14
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice, |
15
|
|
|
* this list of conditions and the following disclaimer in the documentation |
16
|
|
|
* and/or other materials provided with the distribution. |
17
|
|
|
* 3. Neither the name of "highlight.js", "highlight.php", nor the names of its |
18
|
|
|
* contributors may be used to endorse or promote products derived from this |
19
|
|
|
* software without specific prior written permission. |
20
|
|
|
* |
21
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
22
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
23
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
24
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
25
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
26
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
27
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
29
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
30
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
31
|
|
|
* POSSIBILITY OF SUCH DAMAGE. |
32
|
|
|
*/ |
33
|
|
|
|
34
|
|
|
namespace Highlight; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @api |
38
|
|
|
* |
39
|
|
|
* @since 7.5.0.0 |
40
|
|
|
*/ |
41
|
|
|
class Highlighter |
42
|
|
|
{ |
43
|
|
|
/** |
44
|
|
|
* @since 9.12.0.4 |
45
|
|
|
*/ |
46
|
|
|
const SPAN_END_TAG = "</span>"; |
47
|
|
|
|
48
|
|
|
/** @var bool */ |
49
|
|
|
private $safeMode = true; |
50
|
|
|
|
51
|
|
|
// @TODO In v10.x, this value should be static to match highlight.js behavior |
52
|
|
|
/** @var array<string, mixed> */ |
53
|
|
|
private $options; |
54
|
|
|
|
55
|
|
|
/** @var string */ |
56
|
|
|
private $modeBuffer = ""; |
57
|
|
|
|
58
|
|
|
/** @var string */ |
59
|
|
|
private $result = ""; |
60
|
|
|
|
61
|
|
|
/** @var Mode|null */ |
62
|
|
|
private $top = null; |
63
|
|
|
|
64
|
|
|
/** @var Language|null */ |
65
|
|
|
private $language = null; |
66
|
|
|
|
67
|
|
|
/** @var int */ |
68
|
|
|
private $relevance = 0; |
69
|
|
|
|
70
|
|
|
/** @var bool */ |
71
|
|
|
private $ignoreIllegals = false; |
72
|
|
|
|
73
|
|
|
/** @var array<string, Mode> */ |
74
|
|
|
private $continuations = array(); |
75
|
|
|
|
76
|
|
|
/** @var RegExMatch */ |
77
|
|
|
private $lastMatch; |
78
|
|
|
|
79
|
|
|
/** @var string The current code we are highlighting */ |
80
|
|
|
private $codeToHighlight; |
81
|
|
|
|
82
|
|
|
/** @var string[] A list of all the bundled languages */ |
83
|
|
|
private static $bundledLanguages = array(); |
84
|
|
|
|
85
|
|
|
/** @var array<string, Language> A mapping of a language ID to a Language definition */ |
86
|
|
|
private static $classMap = array(); |
87
|
|
|
|
88
|
|
|
/** @var string[] A list of registered language IDs */ |
89
|
|
|
private static $languages = array(); |
90
|
|
|
|
91
|
|
|
/** @var array<string, string> A mapping from alias (key) to main language ID (value) */ |
92
|
|
|
private static $aliases = array(); |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* @param bool $loadAllLanguages If true, will automatically register all languages distributed with this library. |
96
|
|
|
* If false, user must explicitly register languages by calling `registerLanguage()`. |
97
|
|
|
* |
98
|
|
|
* @since 9.18.1.4 added `$loadAllLanguages` parameter |
99
|
|
|
* @see Highlighter::registerLanguage() |
100
|
|
|
*/ |
101
|
|
|
public function __construct($loadAllLanguages = true) |
102
|
|
|
{ |
103
|
|
|
$this->lastMatch = new RegExMatch(array()); |
104
|
|
|
$this->lastMatch->type = ""; |
|
|
|
|
105
|
|
|
$this->lastMatch->rule = null; |
|
|
|
|
106
|
|
|
|
107
|
|
|
// @TODO In v10.x, remove the default value for the `languages` value to follow highlight.js behavior |
108
|
|
|
$this->options = array( |
109
|
|
|
'classPrefix' => 'hljs-', |
110
|
|
|
'tabReplace' => null, |
111
|
|
|
'useBR' => false, |
112
|
|
|
'languages' => array( |
113
|
|
|
"xml", "json", "javascript", "css", "php", "http", |
114
|
|
|
), |
115
|
|
|
); |
116
|
|
|
|
117
|
|
|
if ($loadAllLanguages) { |
118
|
|
|
self::registerAllLanguages(); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Return a list of all available languages bundled with this library. |
124
|
|
|
* |
125
|
|
|
* @since 9.18.1.4 |
126
|
|
|
* |
127
|
|
|
* @return string[] An array of language names |
128
|
|
|
*/ |
129
|
|
|
public static function listBundledLanguages() |
130
|
|
|
{ |
131
|
|
|
if (!empty(self::$bundledLanguages)) { |
132
|
|
|
return self::$bundledLanguages; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
// Languages that take precedence in the classMap array. (I don't know why...) |
136
|
|
|
$bundledLanguages = array( |
137
|
|
|
"xml" => true, |
138
|
|
|
"django" => true, |
139
|
|
|
"javascript" => true, |
140
|
|
|
"matlab" => true, |
141
|
|
|
"cpp" => true, |
142
|
|
|
); |
143
|
|
|
|
144
|
|
|
$languagePath = __DIR__ . '/languages/'; |
145
|
|
|
$d = @dir($languagePath); |
146
|
|
|
|
147
|
|
|
if (!$d) { |
148
|
|
|
throw new \RuntimeException('Could not read bundled language definition directory.'); |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
// @TODO In 10.x, rewrite this as a generator yielding results |
152
|
|
|
while (($entry = $d->read()) !== false) { |
153
|
|
|
if (substr($entry, -5) === ".json") { |
154
|
|
|
$languageId = substr($entry, 0, -5); |
155
|
|
|
$filePath = $languagePath . $entry; |
156
|
|
|
|
157
|
|
|
if (is_readable($filePath)) { |
158
|
|
|
$bundledLanguages[$languageId] = true; |
159
|
|
|
} |
160
|
|
|
} |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
$d->close(); |
164
|
|
|
|
165
|
|
|
return self::$bundledLanguages = array_keys($bundledLanguages); |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* Return a list of all the registered languages. Using this list in |
170
|
|
|
* setAutodetectLanguages will turn on auto-detection for all supported |
171
|
|
|
* languages. |
172
|
|
|
* |
173
|
|
|
* @since 9.18.1.4 |
174
|
|
|
* |
175
|
|
|
* @param bool $includeAliases Specify whether language aliases should be |
176
|
|
|
* included as well |
177
|
|
|
* |
178
|
|
|
* @return string[] An array of language names |
179
|
|
|
*/ |
180
|
|
|
public static function listRegisteredLanguages($includeAliases = false) |
181
|
|
|
{ |
182
|
|
|
if ($includeAliases === true) { |
183
|
|
|
return array_merge(self::$languages, array_keys(self::$aliases)); |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
return self::$languages; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* Register all 185+ languages that are bundled in this library. |
191
|
|
|
* |
192
|
|
|
* To register languages individually, use `registerLanguage`. |
193
|
|
|
* |
194
|
|
|
* @since 9.18.1.4 Method is now public |
195
|
|
|
* @since 8.3.0.0 |
196
|
|
|
* @see Highlighter::registerLanguage |
197
|
|
|
* |
198
|
|
|
* @return void |
199
|
|
|
*/ |
200
|
|
|
public static function registerAllLanguages() |
201
|
|
|
{ |
202
|
|
|
// Languages that take precedence in the classMap array. |
203
|
|
|
$languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR; |
204
|
|
|
foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) { |
205
|
|
|
$filePath = $languagePath . $languageId . ".json"; |
206
|
|
|
if (is_readable($filePath)) { |
207
|
|
|
self::registerLanguage($languageId, $filePath); |
208
|
|
|
} |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
// @TODO In 10.x, call `listBundledLanguages()` instead when it's a generator |
212
|
|
|
$d = @dir($languagePath); |
213
|
|
|
if ($d) { |
214
|
|
|
while (($entry = $d->read()) !== false) { |
215
|
|
|
if (substr($entry, -5) === ".json") { |
216
|
|
|
$languageId = substr($entry, 0, -5); |
217
|
|
|
$filePath = $languagePath . $entry; |
218
|
|
|
if (is_readable($filePath)) { |
219
|
|
|
self::registerLanguage($languageId, $filePath); |
220
|
|
|
} |
221
|
|
|
} |
222
|
|
|
} |
223
|
|
|
$d->close(); |
224
|
|
|
} |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
/** |
228
|
|
|
* Register a language definition with the Highlighter's internal language |
229
|
|
|
* storage. Languages are stored in a static variable, so they'll be available |
230
|
|
|
* across all instances. You only need to register a language once. |
231
|
|
|
* |
232
|
|
|
* @param string $languageId The unique name of a language |
233
|
|
|
* @param string $filePath The file path to the language definition |
234
|
|
|
* @param bool $overwrite Overwrite language if it already exists |
235
|
|
|
* |
236
|
|
|
* @return Language The object containing the definition for a language's markup |
237
|
|
|
*/ |
238
|
|
|
public static function registerLanguage($languageId, $filePath, $overwrite = false) |
239
|
|
|
{ |
240
|
|
|
if (!isset(self::$classMap[$languageId]) || $overwrite) { |
241
|
|
|
$lang = new Language($languageId, $filePath); |
242
|
|
|
self::$classMap[$languageId] = $lang; |
243
|
|
|
|
244
|
|
|
self::$languages[] = $languageId; |
245
|
|
|
self::$languages = array_unique(self::$languages); |
246
|
|
|
|
247
|
|
|
if ($lang->aliases) { |
|
|
|
|
248
|
|
|
foreach ($lang->aliases as $alias) { |
249
|
|
|
self::$aliases[$alias] = $languageId; |
250
|
|
|
} |
251
|
|
|
} |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
return self::$classMap[$languageId]; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* Clear all registered languages. |
259
|
|
|
* |
260
|
|
|
* @since 9.18.1.4 |
261
|
|
|
* |
262
|
|
|
* @return void |
263
|
|
|
*/ |
264
|
|
|
public static function clearAllLanguages() |
265
|
|
|
{ |
266
|
|
|
self::$classMap = array(); |
267
|
|
|
self::$languages = array(); |
268
|
|
|
self::$aliases = array(); |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
/** |
272
|
|
|
* @param RegEx|null $re |
273
|
|
|
* @param string $lexeme |
274
|
|
|
* |
275
|
|
|
* @return bool |
276
|
|
|
*/ |
277
|
|
|
private function testRe($re, $lexeme) |
278
|
|
|
{ |
279
|
|
|
if (!$re) { |
280
|
|
|
return false; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
$lastIndex = $re->lastIndex; |
284
|
|
|
$result = $re->exec($lexeme); |
285
|
|
|
$re->lastIndex = $lastIndex; |
286
|
|
|
|
287
|
|
|
return $result && $result->index === 0; |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
/** |
291
|
|
|
* @param string $value |
292
|
|
|
* |
293
|
|
|
* @return RegEx |
294
|
|
|
*/ |
295
|
|
|
private function escapeRe($value) |
296
|
|
|
{ |
297
|
|
|
return new RegEx(sprintf('/%s/um', preg_quote($value))); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
/** |
301
|
|
|
* @param Mode $mode |
302
|
|
|
* @param string $lexeme |
303
|
|
|
* |
304
|
|
|
* @return Mode|null |
305
|
|
|
*/ |
306
|
|
|
private function endOfMode($mode, $lexeme) |
307
|
|
|
{ |
308
|
|
|
if ($this->testRe($mode->endRe, $lexeme)) { |
309
|
|
|
while ($mode->endsParent && $mode->parent) { |
310
|
|
|
$mode = $mode->parent; |
311
|
|
|
} |
312
|
|
|
|
313
|
|
|
return $mode; |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
if ($mode->endsWithParent) { |
317
|
|
|
return $this->endOfMode($mode->parent, $lexeme); |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
return null; |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
/** |
324
|
|
|
* @param Mode $mode |
325
|
|
|
* @param RegExMatch $match |
326
|
|
|
* |
327
|
|
|
* @return mixed|null |
328
|
|
|
*/ |
329
|
|
|
private function keywordMatch($mode, $match) |
330
|
|
|
{ |
331
|
|
|
$kwd = $this->language->case_insensitive ? mb_strtolower($match[0], "UTF-8") : $match[0]; |
|
|
|
|
332
|
|
|
|
333
|
|
|
return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* @param string $className |
338
|
|
|
* @param string $insideSpan |
339
|
|
|
* @param bool $leaveOpen |
340
|
|
|
* @param bool $noPrefix |
341
|
|
|
* |
342
|
|
|
* @return string |
343
|
|
|
*/ |
344
|
|
|
private function buildSpan($className, $insideSpan, $leaveOpen = false, $noPrefix = false) |
345
|
|
|
{ |
346
|
|
|
if (!$leaveOpen && $insideSpan === '') { |
347
|
|
|
return ''; |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
if (!$className) { |
351
|
|
|
return $insideSpan; |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
$classPrefix = $noPrefix ? "" : $this->options['classPrefix']; |
355
|
|
|
$openSpan = "<span class=\"" . $classPrefix; |
356
|
|
|
$closeSpan = $leaveOpen ? "" : self::SPAN_END_TAG; |
357
|
|
|
|
358
|
|
|
$openSpan .= $className . "\">"; |
359
|
|
|
|
360
|
|
|
return $openSpan . $insideSpan . $closeSpan; |
361
|
|
|
} |
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* @param string $value |
365
|
|
|
* |
366
|
|
|
* @return string |
367
|
|
|
*/ |
368
|
|
|
private function escape($value) |
369
|
|
|
{ |
370
|
|
|
return htmlspecialchars($value, ENT_NOQUOTES); |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
/** |
374
|
|
|
* @return string |
375
|
|
|
*/ |
376
|
|
|
private function processKeywords() |
377
|
|
|
{ |
378
|
|
|
if (!$this->top->keywords) { |
|
|
|
|
379
|
|
|
return $this->escape($this->modeBuffer); |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
$result = ""; |
383
|
|
|
$lastIndex = 0; |
384
|
|
|
$this->top->lexemesRe->lastIndex = 0; |
385
|
|
|
$match = $this->top->lexemesRe->exec($this->modeBuffer); |
|
|
|
|
386
|
|
|
|
387
|
|
|
while ($match) { |
388
|
|
|
$result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match->index - $lastIndex)); |
389
|
|
|
$keyword_match = $this->keywordMatch($this->top, $match); |
390
|
|
|
|
391
|
|
|
if ($keyword_match) { |
392
|
|
|
$this->relevance += $keyword_match[1]; |
393
|
|
|
$result .= $this->buildSpan($keyword_match[0], $this->escape($match[0])); |
394
|
|
|
} else { |
395
|
|
|
$result .= $this->escape($match[0]); |
396
|
|
|
} |
397
|
|
|
|
398
|
|
|
$lastIndex = $this->top->lexemesRe->lastIndex; |
399
|
|
|
$match = $this->top->lexemesRe->exec($this->modeBuffer); |
400
|
|
|
} |
401
|
|
|
|
402
|
|
|
return $result . $this->escape(substr($this->modeBuffer, $lastIndex)); |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
/** |
406
|
|
|
* @return string |
407
|
|
|
*/ |
408
|
|
|
private function processSubLanguage() |
409
|
|
|
{ |
410
|
|
|
try { |
411
|
|
|
$hl = new Highlighter(); |
412
|
|
|
|
413
|
|
|
// @TODO in v10.x, this should no longer be necessary once `$options` is made static |
414
|
|
|
$hl->setAutodetectLanguages($this->options['languages']); |
415
|
|
|
$hl->setClassPrefix($this->options['classPrefix']); |
416
|
|
|
$hl->setTabReplace($this->options['tabReplace']); |
417
|
|
|
|
418
|
|
|
if (!$this->safeMode) { |
419
|
|
|
$hl->disableSafeMode(); |
420
|
|
|
} |
421
|
|
|
|
422
|
|
|
$explicit = is_string($this->top->subLanguage); |
423
|
|
|
if ($explicit && !in_array($this->top->subLanguage, self::$languages)) { |
424
|
|
|
return $this->escape($this->modeBuffer); |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
if ($explicit) { |
428
|
|
|
$res = $hl->highlight( |
429
|
|
|
$this->top->subLanguage, |
|
|
|
|
430
|
|
|
$this->modeBuffer, |
431
|
|
|
true, |
432
|
|
|
isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null |
433
|
|
|
); |
434
|
|
|
} else { |
435
|
|
|
$res = $hl->highlightAuto( |
436
|
|
|
$this->modeBuffer, |
437
|
|
|
count($this->top->subLanguage) ? $this->top->subLanguage : null |
|
|
|
|
438
|
|
|
); |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
// Counting embedded language score towards the host language may be disabled |
442
|
|
|
// with zeroing the containing mode relevance. Use case in point is Markdown that |
443
|
|
|
// allows XML everywhere and makes every XML snippet to have a much larger Markdown |
444
|
|
|
// score. |
445
|
|
|
if ($this->top->relevance > 0) { |
446
|
|
|
$this->relevance += $res->relevance; |
447
|
|
|
} |
448
|
|
|
if ($explicit) { |
449
|
|
|
$this->continuations[$this->top->subLanguage] = $res->top; |
450
|
|
|
} |
451
|
|
|
|
452
|
|
|
return $this->buildSpan($res->language, $res->value, false, true); |
453
|
|
|
} catch (\Exception $e) { |
454
|
|
|
return $this->escape($this->modeBuffer); |
455
|
|
|
} |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* @return void |
460
|
|
|
*/ |
461
|
|
|
private function processBuffer() |
462
|
|
|
{ |
463
|
|
|
if (is_object($this->top) && $this->top->subLanguage) { |
464
|
|
|
$this->result .= $this->processSubLanguage(); |
465
|
|
|
} else { |
466
|
|
|
$this->result .= $this->processKeywords(); |
467
|
|
|
} |
468
|
|
|
|
469
|
|
|
$this->modeBuffer = ''; |
470
|
|
|
} |
471
|
|
|
|
472
|
|
|
/** |
473
|
|
|
* @param Mode $mode |
474
|
|
|
* |
475
|
|
|
* @return void |
476
|
|
|
*/ |
477
|
|
|
private function startNewMode($mode) |
478
|
|
|
{ |
479
|
|
|
$this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : ""; |
480
|
|
|
|
481
|
|
|
$t = clone $mode; |
482
|
|
|
$t->parent = $this->top; |
483
|
|
|
$this->top = $t; |
484
|
|
|
} |
485
|
|
|
|
486
|
|
|
/** |
487
|
|
|
* @param RegExMatch $match |
488
|
|
|
* |
489
|
|
|
* @return int |
490
|
|
|
*/ |
491
|
|
|
private function doBeginMatch($match) |
492
|
|
|
{ |
493
|
|
|
$lexeme = $match[0]; |
494
|
|
|
$newMode = $match->rule; |
|
|
|
|
495
|
|
|
|
496
|
|
|
if ($newMode && $newMode->endSameAsBegin) { |
497
|
|
|
$newMode->endRe = $this->escapeRe($lexeme); |
498
|
|
|
} |
499
|
|
|
|
500
|
|
|
if ($newMode->skip) { |
501
|
|
|
$this->modeBuffer .= $lexeme; |
502
|
|
|
} else { |
503
|
|
|
if ($newMode->excludeBegin) { |
504
|
|
|
$this->modeBuffer .= $lexeme; |
505
|
|
|
} |
506
|
|
|
$this->processBuffer(); |
507
|
|
|
if (!$newMode->returnBegin && !$newMode->excludeBegin) { |
508
|
|
|
$this->modeBuffer = $lexeme; |
509
|
|
|
} |
510
|
|
|
} |
511
|
|
|
$this->startNewMode($newMode); |
512
|
|
|
|
513
|
|
|
return $newMode->returnBegin ? 0 : strlen($lexeme); |
|
|
|
|
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
/** |
517
|
|
|
* @param RegExMatch $match |
518
|
|
|
* |
519
|
|
|
* @return int|null |
520
|
|
|
*/ |
521
|
|
|
private function doEndMatch($match) |
522
|
|
|
{ |
523
|
|
|
$lexeme = $match[0]; |
524
|
|
|
$matchPlusRemainder = substr($this->codeToHighlight, $match->index); |
525
|
|
|
$endMode = $this->endOfMode($this->top, $matchPlusRemainder); |
526
|
|
|
|
527
|
|
|
if (!$endMode) { |
528
|
|
|
return null; |
529
|
|
|
} |
530
|
|
|
|
531
|
|
|
$origin = $this->top; |
532
|
|
|
if ($origin->skip) { |
533
|
|
|
$this->modeBuffer .= $lexeme; |
534
|
|
|
} else { |
535
|
|
|
if (!($origin->returnEnd || $origin->excludeEnd)) { |
536
|
|
|
$this->modeBuffer .= $lexeme; |
537
|
|
|
} |
538
|
|
|
$this->processBuffer(); |
539
|
|
|
if ($origin->excludeEnd) { |
540
|
|
|
$this->modeBuffer = $lexeme; |
541
|
|
|
} |
542
|
|
|
} |
543
|
|
|
|
544
|
|
|
do { |
545
|
|
|
if ($this->top->className) { |
546
|
|
|
$this->result .= self::SPAN_END_TAG; |
547
|
|
|
} |
548
|
|
|
if (!$this->top->skip && !$this->top->subLanguage) { |
549
|
|
|
$this->relevance += $this->top->relevance; |
550
|
|
|
} |
551
|
|
|
$this->top = $this->top->parent; |
552
|
|
|
} while ($this->top !== $endMode->parent); |
553
|
|
|
|
554
|
|
|
if ($endMode->starts) { |
555
|
|
|
if ($endMode->endSameAsBegin) { |
556
|
|
|
$endMode->starts->endRe = $endMode->endRe; |
557
|
|
|
} |
558
|
|
|
|
559
|
|
|
$this->startNewMode($endMode->starts); |
560
|
|
|
} |
561
|
|
|
|
562
|
|
|
return $origin->returnEnd ? 0 : strlen($lexeme); |
|
|
|
|
563
|
|
|
} |
564
|
|
|
|
565
|
|
|
/** |
566
|
|
|
* @param string $textBeforeMatch |
567
|
|
|
* @param RegExMatch|null $match |
568
|
|
|
* |
569
|
|
|
* @return int |
570
|
|
|
*/ |
571
|
|
|
private function processLexeme($textBeforeMatch, $match = null) |
572
|
|
|
{ |
573
|
|
|
$lexeme = $match ? $match[0] : null; |
574
|
|
|
|
575
|
|
|
// add non-matched text to the current mode buffer |
576
|
|
|
$this->modeBuffer .= $textBeforeMatch; |
577
|
|
|
|
578
|
|
|
if ($lexeme === null) { |
579
|
|
|
$this->processBuffer(); |
580
|
|
|
|
581
|
|
|
return 0; |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
// we've found a 0 width match and we're stuck, so we need to advance |
585
|
|
|
// this happens when we have badly behaved rules that have optional matchers to the degree that |
586
|
|
|
// sometimes they can end up matching nothing at all |
587
|
|
|
// Ref: https://github.com/highlightjs/highlight.js/issues/2140 |
588
|
|
|
if ($this->lastMatch->type === "begin" && $match->type === "end" && $this->lastMatch->index === $match->index && $lexeme === "") { |
|
|
|
|
589
|
|
|
// spit the "skipped" character that our regex choked on back into the output sequence |
590
|
|
|
$this->modeBuffer .= substr($this->codeToHighlight, $match->index, 1); |
591
|
|
|
|
592
|
|
|
return 1; |
593
|
|
|
} |
594
|
|
|
$this->lastMatch = $match; |
595
|
|
|
|
596
|
|
|
if ($match->type === "begin") { |
597
|
|
|
return $this->doBeginMatch($match); |
598
|
|
|
} elseif ($match->type === "illegal" && !$this->ignoreIllegals) { |
599
|
|
|
// illegal match, we do not continue processing |
600
|
|
|
$_modeRaw = isset($this->top->className) ? $this->top->className : "<unnamed>"; |
601
|
|
|
|
602
|
|
|
throw new \UnexpectedValueException("Illegal lexeme \"$lexeme\" for mode \"$_modeRaw\""); |
603
|
|
|
} elseif ($match->type === "end") { |
604
|
|
|
$processed = $this->doEndMatch($match); |
605
|
|
|
|
606
|
|
|
if ($processed !== null) { |
607
|
|
|
return $processed; |
608
|
|
|
} |
609
|
|
|
} |
610
|
|
|
|
611
|
|
|
// Why might be find ourselves here? Only one occasion now. An end match that was |
612
|
|
|
// triggered but could not be completed. When might this happen? When an `endSameasBegin` |
613
|
|
|
// rule sets the end rule to a specific match. Since the overall mode termination rule that's |
614
|
|
|
// being used to scan the text isn't recompiled that means that any match that LOOKS like |
615
|
|
|
// the end (but is not, because it is not an exact match to the beginning) will |
616
|
|
|
// end up here. A definite end match, but when `doEndMatch` tries to "reapply" |
617
|
|
|
// the end rule and fails to match, we wind up here, and just silently ignore the end. |
618
|
|
|
// |
619
|
|
|
// This causes no real harm other than stopping a few times too many. |
620
|
|
|
|
621
|
|
|
$this->modeBuffer .= $lexeme; |
622
|
|
|
|
623
|
|
|
return strlen($lexeme); |
624
|
|
|
} |
625
|
|
|
|
626
|
|
|
/** |
627
|
|
|
* Replace tabs for something more usable. |
628
|
|
|
* |
629
|
|
|
* @param string $code |
630
|
|
|
* |
631
|
|
|
* @return string |
632
|
|
|
*/ |
633
|
|
|
private function replaceTabs($code) |
634
|
|
|
{ |
635
|
|
|
if ($this->options['tabReplace'] !== null) { |
636
|
|
|
return str_replace("\t", $this->options['tabReplace'], $code); |
637
|
|
|
} |
638
|
|
|
|
639
|
|
|
return $code; |
640
|
|
|
} |
641
|
|
|
|
642
|
|
|
/** |
643
|
|
|
* Set the languages that will used for auto-detection. When using auto- |
644
|
|
|
* detection the code to highlight will be probed for every language in this |
645
|
|
|
* set. Limiting this set to only the languages you want to use will greatly |
646
|
|
|
* improve highlighting speed. |
647
|
|
|
* |
648
|
|
|
* @param string[] $set An array of language games to use for autodetection. |
649
|
|
|
* This defaults to a typical set Web development |
650
|
|
|
* languages. |
651
|
|
|
* |
652
|
|
|
* @return void |
653
|
|
|
*/ |
654
|
|
|
public function setAutodetectLanguages(array $set) |
655
|
|
|
{ |
656
|
|
|
$this->options['languages'] = array_unique($set); |
657
|
|
|
} |
658
|
|
|
|
659
|
|
|
/** |
660
|
|
|
* Get the tab replacement string. |
661
|
|
|
* |
662
|
|
|
* @return string The tab replacement string |
663
|
|
|
*/ |
664
|
|
|
public function getTabReplace() |
665
|
|
|
{ |
666
|
|
|
return $this->options['tabReplace']; |
667
|
|
|
} |
668
|
|
|
|
669
|
|
|
/** |
670
|
|
|
* Set the tab replacement string. This defaults to NULL: no tabs |
671
|
|
|
* will be replaced. |
672
|
|
|
* |
673
|
|
|
* @param string $tabReplace The tab replacement string |
674
|
|
|
* |
675
|
|
|
* @return void |
676
|
|
|
*/ |
677
|
|
|
public function setTabReplace($tabReplace) |
678
|
|
|
{ |
679
|
|
|
$this->options['tabReplace'] = $tabReplace; |
680
|
|
|
} |
681
|
|
|
|
682
|
|
|
/** |
683
|
|
|
* Get the class prefix string. |
684
|
|
|
* |
685
|
|
|
* @return string The class prefix string |
686
|
|
|
*/ |
687
|
|
|
public function getClassPrefix() |
688
|
|
|
{ |
689
|
|
|
return $this->options['classPrefix']; |
690
|
|
|
} |
691
|
|
|
|
692
|
|
|
/** |
693
|
|
|
* Set the class prefix string. |
694
|
|
|
* |
695
|
|
|
* @param string $classPrefix The class prefix string |
696
|
|
|
* |
697
|
|
|
* @return void |
698
|
|
|
*/ |
699
|
|
|
public function setClassPrefix($classPrefix) |
700
|
|
|
{ |
701
|
|
|
$this->options['classPrefix'] = $classPrefix; |
702
|
|
|
} |
703
|
|
|
|
704
|
|
|
/** |
705
|
|
|
* @since 9.17.1.0 |
706
|
|
|
* |
707
|
|
|
* @return void |
708
|
|
|
*/ |
709
|
|
|
public function enableSafeMode() |
710
|
|
|
{ |
711
|
|
|
$this->safeMode = true; |
712
|
|
|
} |
713
|
|
|
|
714
|
|
|
/** |
715
|
|
|
* @since 9.17.1.0 |
716
|
|
|
* |
717
|
|
|
* @return void |
718
|
|
|
*/ |
719
|
|
|
public function disableSafeMode() |
720
|
|
|
{ |
721
|
|
|
$this->safeMode = false; |
722
|
|
|
} |
723
|
|
|
|
724
|
|
|
/** |
725
|
|
|
* @param string $name |
726
|
|
|
* |
727
|
|
|
* @return Language|null |
728
|
|
|
*/ |
729
|
|
|
private function getLanguage($name) |
730
|
|
|
{ |
731
|
|
|
if (isset(self::$classMap[$name])) { |
732
|
|
|
return self::$classMap[$name]; |
733
|
|
|
} elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) { |
734
|
|
|
return self::$classMap[self::$aliases[$name]]; |
735
|
|
|
} |
736
|
|
|
|
737
|
|
|
return null; |
738
|
|
|
} |
739
|
|
|
|
740
|
|
|
/** |
741
|
|
|
* Determine whether or not a language definition supports auto detection. |
742
|
|
|
* |
743
|
|
|
* @param string $name Language name |
744
|
|
|
* |
745
|
|
|
* @return bool |
746
|
|
|
*/ |
747
|
|
|
private function autoDetection($name) |
748
|
|
|
{ |
749
|
|
|
$lang = $this->getLanguage($name); |
750
|
|
|
|
751
|
|
|
return $lang && !$lang->disableAutodetect; |
752
|
|
|
} |
753
|
|
|
|
754
|
|
|
/** |
755
|
|
|
* Core highlighting function. Accepts a language name, or an alias, and a |
756
|
|
|
* string with the code to highlight. Returns an object with the following |
757
|
|
|
* properties: |
758
|
|
|
* - relevance (int) |
759
|
|
|
* - value (an HTML string with highlighting markup). |
760
|
|
|
* |
761
|
|
|
* @todo In v10.x, change the return type from \stdClass to HighlightResult |
762
|
|
|
* |
763
|
|
|
* @param string $languageName |
764
|
|
|
* @param string $code |
765
|
|
|
* @param bool $ignoreIllegals |
766
|
|
|
* @param Mode|null $continuation |
767
|
|
|
* |
768
|
|
|
* @throws \DomainException if the requested language was not in this |
769
|
|
|
* Highlighter's language set |
770
|
|
|
* @throws \Exception if an invalid regex was given in a language file |
771
|
|
|
* |
772
|
|
|
* @return HighlightResult|\stdClass |
773
|
|
|
*/ |
774
|
|
|
public function highlight($languageName, $code, $ignoreIllegals = true, $continuation = null) |
775
|
|
|
{ |
776
|
|
|
$this->codeToHighlight = $code; |
777
|
|
|
$this->language = $this->getLanguage($languageName); |
778
|
|
|
|
779
|
|
|
if ($this->language === null) { |
780
|
|
|
throw new \DomainException("Unknown language: \"$languageName\""); |
781
|
|
|
} |
782
|
|
|
|
783
|
|
|
$this->language->compile($this->safeMode); |
784
|
|
|
$this->top = $continuation ? $continuation : $this->language; |
785
|
|
|
$this->continuations = array(); |
786
|
|
|
$this->result = ""; |
787
|
|
|
|
788
|
|
|
for ($current = $this->top; $current !== $this->language; $current = $current->parent) { |
789
|
|
|
if ($current->className) { |
790
|
|
|
$this->result = $this->buildSpan($current->className, '', true) . $this->result; |
791
|
|
|
} |
792
|
|
|
} |
793
|
|
|
|
794
|
|
|
$this->modeBuffer = ""; |
795
|
|
|
$this->relevance = 0; |
796
|
|
|
$this->ignoreIllegals = $ignoreIllegals; |
797
|
|
|
|
798
|
|
|
/** @var HighlightResult $res */ |
799
|
|
|
$res = new \stdClass(); |
800
|
|
|
$res->relevance = 0; |
801
|
|
|
$res->value = ""; |
802
|
|
|
$res->language = ""; |
803
|
|
|
$res->top = null; |
804
|
|
|
$res->errorRaised = null; |
805
|
|
|
|
806
|
|
|
try { |
807
|
|
|
$match = null; |
|
|
|
|
808
|
|
|
$count = 0; |
|
|
|
|
809
|
|
|
$index = 0; |
810
|
|
|
|
811
|
|
|
while ($this->top) { |
812
|
|
|
$this->top->terminators->lastIndex = $index; |
813
|
|
|
$match = $this->top->terminators->exec($this->codeToHighlight); |
|
|
|
|
814
|
|
|
|
815
|
|
|
if (!$match) { |
816
|
|
|
break; |
817
|
|
|
} |
818
|
|
|
|
819
|
|
|
$count = $this->processLexeme(substr($this->codeToHighlight, $index, $match->index - $index), $match); |
820
|
|
|
$index = $match->index + $count; |
821
|
|
|
} |
822
|
|
|
|
823
|
|
|
$this->processLexeme(substr($this->codeToHighlight, $index)); |
824
|
|
|
|
825
|
|
|
for ($current = $this->top; isset($current->parent); $current = $current->parent) { |
826
|
|
|
if ($current->className) { |
827
|
|
|
$this->result .= self::SPAN_END_TAG; |
828
|
|
|
} |
829
|
|
|
} |
830
|
|
|
|
831
|
|
|
$res->relevance = $this->relevance; |
832
|
|
|
$res->value = $this->replaceTabs($this->result); |
833
|
|
|
$res->illegal = false; |
834
|
|
|
$res->language = $this->language->name; |
835
|
|
|
$res->top = $this->top; |
836
|
|
|
|
837
|
|
|
return $res; |
838
|
|
|
} catch (\Exception $e) { |
839
|
|
|
if (strpos($e->getMessage(), "Illegal") !== false) { |
840
|
|
|
$res->illegal = true; |
841
|
|
|
$res->relevance = 0; |
842
|
|
|
$res->value = $this->escape($this->codeToHighlight); |
843
|
|
|
|
844
|
|
|
return $res; |
845
|
|
|
} elseif ($this->safeMode) { |
846
|
|
|
$res->relevance = 0; |
847
|
|
|
$res->value = $this->escape($this->codeToHighlight); |
848
|
|
|
$res->language = $languageName; |
849
|
|
|
$res->top = $this->top; |
850
|
|
|
$res->errorRaised = $e; |
851
|
|
|
|
852
|
|
|
return $res; |
853
|
|
|
} |
854
|
|
|
|
855
|
|
|
throw $e; |
856
|
|
|
} |
857
|
|
|
} |
858
|
|
|
|
859
|
|
|
/** |
860
|
|
|
* Highlight the given code by highlighting the given code with each |
861
|
|
|
* registered language and then finding the match with highest accuracy. |
862
|
|
|
* |
863
|
|
|
* @param string $code |
864
|
|
|
* @param string[]|null $languageSubset When set to null, this method will attempt to highlight $text with each |
865
|
|
|
* language. Set this to an array of languages of your choice to limit the |
866
|
|
|
* amount of languages to try. |
867
|
|
|
* |
868
|
|
|
* @throws \Exception if an invalid regex was given in a language file |
869
|
|
|
* @throws \DomainException if the attempted language to check does not exist |
870
|
|
|
* |
871
|
|
|
* @return HighlightResult|\stdClass |
872
|
|
|
*/ |
873
|
|
|
public function highlightAuto($code, $languageSubset = null) |
874
|
|
|
{ |
875
|
|
|
/** @var HighlightResult $result */ |
876
|
|
|
$result = new \stdClass(); |
877
|
|
|
$result->relevance = 0; |
878
|
|
|
$result->value = $this->escape($code); |
879
|
|
|
$result->language = ""; |
880
|
|
|
$secondBest = clone $result; |
881
|
|
|
|
882
|
|
|
if ($languageSubset === null) { |
883
|
|
|
$optionsLanguages = $this->options['languages']; |
884
|
|
|
|
885
|
|
|
if (is_array($optionsLanguages) && count($optionsLanguages) > 0) { |
886
|
|
|
$languageSubset = $optionsLanguages; |
887
|
|
|
} else { |
888
|
|
|
$languageSubset = self::$languages; |
889
|
|
|
} |
890
|
|
|
} |
891
|
|
|
|
892
|
|
|
foreach ($languageSubset as $name) { |
893
|
|
|
if ($this->getLanguage($name) === null || !$this->autoDetection($name)) { |
894
|
|
|
continue; |
895
|
|
|
} |
896
|
|
|
|
897
|
|
|
$current = $this->highlight($name, $code, false); |
898
|
|
|
|
899
|
|
|
if ($current->relevance > $secondBest->relevance) { |
900
|
|
|
$secondBest = $current; |
901
|
|
|
} |
902
|
|
|
|
903
|
|
|
if ($current->relevance > $result->relevance) { |
904
|
|
|
$secondBest = $result; |
905
|
|
|
$result = $current; |
906
|
|
|
} |
907
|
|
|
} |
908
|
|
|
|
909
|
|
|
if ($secondBest->language) { |
910
|
|
|
$result->secondBest = $secondBest; |
911
|
|
|
} |
912
|
|
|
|
913
|
|
|
return $result; |
914
|
|
|
} |
915
|
|
|
|
916
|
|
|
/** |
917
|
|
|
* Return a list of all supported languages. Using this list in |
918
|
|
|
* setAutodetectLanguages will turn on autodetection for all supported |
919
|
|
|
* languages. |
920
|
|
|
* |
921
|
|
|
* @deprecated use `Highlighter::listRegisteredLanguages()` or `Highlighter::listBundledLanguages()` instead |
922
|
|
|
* |
923
|
|
|
* @param bool $include_aliases specify whether language aliases |
924
|
|
|
* should be included as well |
925
|
|
|
* |
926
|
|
|
* @since 9.18.1.4 Deprecated in favor of `Highlighter::listRegisteredLanguages()` |
927
|
|
|
* and `Highlighter::listBundledLanguages()`. |
928
|
|
|
* @since 9.12.0.3 The `$include_aliases` parameter was added |
929
|
|
|
* @since 8.3.0.0 |
930
|
|
|
* |
931
|
|
|
* @return string[] An array of language names |
932
|
|
|
*/ |
933
|
|
|
public function listLanguages($include_aliases = false) |
934
|
|
|
{ |
935
|
|
|
@trigger_error('This method is deprecated in favor `Highlighter::listRegisteredLanguages()` or `Highlighter::listBundledLanguages()`. This function will be removed in highlight.php 10.', E_USER_DEPRECATED); |
936
|
|
|
|
937
|
|
|
if (empty(self::$languages)) { |
938
|
|
|
trigger_error('No languages are registered, returning all bundled languages instead. You probably did not want this.', E_USER_WARNING); |
939
|
|
|
|
940
|
|
|
return self::listBundledLanguages(); |
941
|
|
|
} |
942
|
|
|
|
943
|
|
|
if ($include_aliases === true) { |
944
|
|
|
return array_merge(self::$languages, array_keys(self::$aliases)); |
945
|
|
|
} |
946
|
|
|
|
947
|
|
|
return self::$languages; |
948
|
|
|
} |
949
|
|
|
|
950
|
|
|
/** |
951
|
|
|
* Returns list of all available aliases for given language name. |
952
|
|
|
* |
953
|
|
|
* @param string $name name or alias of language to look-up |
954
|
|
|
* |
955
|
|
|
* @throws \DomainException if the requested language was not in this |
956
|
|
|
* Highlighter's language set |
957
|
|
|
* |
958
|
|
|
* @since 9.12.0.3 |
959
|
|
|
* |
960
|
|
|
* @return string[] An array of all aliases associated with the requested |
961
|
|
|
* language name language. Passed-in name is included as |
962
|
|
|
* well. |
963
|
|
|
*/ |
964
|
|
|
public function getAliasesForLanguage($name) |
965
|
|
|
{ |
966
|
|
|
$language = self::getLanguage($name); |
|
|
|
|
967
|
|
|
|
968
|
|
|
if ($language === null) { |
969
|
|
|
throw new \DomainException("Unknown language: $language"); |
970
|
|
|
} |
971
|
|
|
|
972
|
|
|
if ($language->aliases === null) { |
973
|
|
|
return array($language->name); |
974
|
|
|
} |
975
|
|
|
|
976
|
|
|
return array_merge(array($language->name), $language->aliases); |
977
|
|
|
} |
978
|
|
|
} |
979
|
|
|
|