1 | <?php |
||
24 | class RegexHelper |
||
25 | { |
||
26 | const ESCAPABLE = 0; |
||
27 | const ESCAPED_CHAR = 1; |
||
28 | const IN_DOUBLE_QUOTES = 2; |
||
29 | const IN_SINGLE_QUOTES = 3; |
||
30 | const IN_PARENS = 4; |
||
31 | const REG_CHAR = 5; |
||
32 | const IN_PARENS_NOSP = 6; |
||
33 | const TAGNAME = 7; |
||
34 | const BLOCKTAGNAME = 8; |
||
35 | const ATTRIBUTENAME = 9; |
||
36 | const UNQUOTEDVALUE = 10; |
||
37 | const SINGLEQUOTEDVALUE = 11; |
||
38 | const DOUBLEQUOTEDVALUE = 12; |
||
39 | const ATTRIBUTEVALUE = 13; |
||
40 | const ATTRIBUTEVALUESPEC = 14; |
||
41 | const ATTRIBUTE = 15; |
||
42 | const OPENTAG = 16; |
||
43 | const CLOSETAG = 17; |
||
44 | const OPENBLOCKTAG = 18; |
||
45 | const CLOSEBLOCKTAG = 19; |
||
46 | const HTMLCOMMENT = 20; |
||
47 | const PROCESSINGINSTRUCTION = 21; |
||
48 | const DECLARATION = 22; |
||
49 | const CDATA = 23; |
||
50 | const HTMLTAG = 24; |
||
51 | const HTMLBLOCKOPEN = 25; |
||
52 | const LINK_TITLE = 26; |
||
53 | |||
54 | const REGEX_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\]^_`{|}~-]'; |
||
55 | const REGEX_ENTITY = '&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});'; |
||
56 | const REGEX_PUNCTUATION = '/^[\x{2000}-\x{206F}\x{2E00}-\x{2E7F}\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\\\\\'!"#\$%&\(\)\*\+,\-\.\\/:;<=>\?@\[\]\^_`\{\|\}~]/u'; |
||
57 | const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i'; |
||
58 | const REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i'; |
||
59 | const REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/'; |
||
60 | |||
61 | const REGEX_WHITESPACE_CHAR = '/^[ \t\n\x0b\x0c\x0d]/'; |
||
62 | const REGEX_WHITESPACE = '/[ \t\n\x0b\x0c\x0d]+/'; |
||
63 | const REGEX_UNICODE_WHITESPACE_CHAR = '/^\pZ|\s/u'; |
||
64 | |||
65 | /** |
||
66 | * @deprecated |
||
67 | */ |
||
68 | const REGEX_UNICODE_WHITESPACE = '/\pZ|\s/u'; |
||
69 | |||
70 | protected $regex = []; |
||
71 | |||
72 | protected static $instance; |
||
73 | |||
74 | /** |
||
75 | * Constructor |
||
76 | */ |
||
77 | 3 | protected function __construct() |
|
81 | |||
82 | /** |
||
83 | * @return RegexHelper |
||
84 | */ |
||
85 | 1815 | public static function getInstance() |
|
93 | |||
94 | /** |
||
95 | * Builds the regular expressions required to parse Markdown |
||
96 | * |
||
97 | * We could hard-code them all as pre-built constants, but that would be more difficult to manage. |
||
98 | */ |
||
99 | 3 | protected function buildRegexPatterns() |
|
136 | |||
137 | /** |
||
138 | * Returns a partial regex |
||
139 | * |
||
140 | * It'll need to be wrapped with /.../ before use |
||
141 | * |
||
142 | * @param int $const |
||
143 | * |
||
144 | * @return string |
||
145 | */ |
||
146 | 615 | public function getPartialRegex($const) |
|
150 | |||
151 | /** |
||
152 | * @return string |
||
153 | */ |
||
154 | 123 | public function getHtmlTagRegex() |
|
158 | |||
159 | /** |
||
160 | * @return string |
||
161 | */ |
||
162 | 270 | public function getLinkTitleRegex() |
|
166 | |||
167 | /** |
||
168 | * @return string |
||
169 | * |
||
170 | * @deprecated |
||
171 | */ |
||
172 | public function getLinkDestinationRegex() |
||
178 | |||
179 | /** |
||
180 | * @return string |
||
181 | */ |
||
182 | 387 | public function getLinkDestinationBracesRegex() |
|
186 | |||
187 | /** |
||
188 | * @return string |
||
189 | */ |
||
190 | 1689 | public function getThematicBreakRegex() |
|
194 | |||
195 | /** |
||
196 | * Attempt to match a regex in string s at offset offset |
||
197 | * |
||
198 | * @param string $regex |
||
199 | * @param string $string |
||
200 | * @param int $offset |
||
201 | * |
||
202 | * @return int|null Index of match, or null |
||
203 | */ |
||
204 | 1776 | public static function matchAt($regex, $string, $offset = 0) |
|
217 | |||
218 | /** |
||
219 | * Functional wrapper around preg_match_all |
||
220 | * |
||
221 | * @param string $pattern |
||
222 | * @param string $subject |
||
223 | * @param int $offset |
||
224 | * |
||
225 | * @return array|null |
||
226 | */ |
||
227 | 1875 | public static function matchAll($pattern, $subject, $offset = 0) |
|
248 | |||
249 | /** |
||
250 | * Replace backslash escapes with literal characters |
||
251 | * |
||
252 | * @param string $string |
||
253 | * |
||
254 | * @return string |
||
255 | */ |
||
256 | 492 | public static function unescape($string) |
|
267 | |||
268 | /** |
||
269 | * @param int $type HTML block type |
||
270 | * |
||
271 | * @return string|null |
||
272 | */ |
||
273 | 279 | public static function getHtmlBlockOpenRegex($type) |
|
294 | |||
295 | /** |
||
296 | * @param int $type HTML block type |
||
297 | * |
||
298 | * @return string|null |
||
299 | */ |
||
300 | 60 | public static function getHtmlBlockCloseRegex($type) |
|
315 | |||
316 | /** |
||
317 | * @param string $url |
||
318 | * |
||
319 | * @return bool |
||
320 | */ |
||
321 | 30 | public static function isLinkPotentiallyUnsafe($url) |
|
325 | } |
||
326 |
If you suppress an error, we recommend checking for the error condition explicitly: