GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 6c85d2...15e8d5 )
by Colin
04:57
created

RegexHelper::getThematicBreakRegex()   A

↳ Parent: RegexHelper

Complexity

Conditions 1
Paths 1

Duplication

Lines 0
Ratio 0 %

Size

Total Lines 4
Code Lines 2

Code Coverage

Tests 2
CRAP Score 1
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Original code based on the CommonMark JS reference parser (http://bitly.com/commonmark-js)
9
 *  - (c) John MacFarlane
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace League\CommonMark\Util;
16
17
use League\CommonMark\Block\Element\HtmlBlock;
18
19
/**
20
 * Provides regular expressions and utilties for parsing Markdown
21
 *
22
 * Singletons are generally bad, but it allows us to build the regexes once (and only once).
23
 */
24
class RegexHelper
25
{
26
    const ESCAPABLE = 0;
27
    const ESCAPED_CHAR = 1;
28
    const IN_DOUBLE_QUOTES = 2;
29
    const IN_SINGLE_QUOTES = 3;
30
    const IN_PARENS = 4;
31
    const REG_CHAR = 5;
32
    const IN_PARENS_NOSP = 6;
33
    const TAGNAME = 7;
34
    const BLOCKTAGNAME = 8;
35
    const ATTRIBUTENAME = 9;
36
    const UNQUOTEDVALUE = 10;
37
    const SINGLEQUOTEDVALUE = 11;
38
    const DOUBLEQUOTEDVALUE = 12;
39
    const ATTRIBUTEVALUE = 13;
40
    const ATTRIBUTEVALUESPEC = 14;
41
    const ATTRIBUTE = 15;
42
    const OPENTAG = 16;
43
    const CLOSETAG = 17;
44
    const OPENBLOCKTAG = 18;
45
    const CLOSEBLOCKTAG = 19;
46
    const HTMLCOMMENT = 20;
47
    const PROCESSINGINSTRUCTION = 21;
48
    const DECLARATION = 22;
49
    const CDATA = 23;
50
    const HTMLTAG = 24;
51
    const HTMLBLOCKOPEN = 25;
52
    const LINK_TITLE = 26;
53
54
    const REGEX_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\]^_`{|}~-]';
55
    const REGEX_ENTITY = '&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});';
56
    const REGEX_PUNCTUATION = '/^[\x{2000}-\x{206F}\x{2E00}-\x{2E7F}\\\\\'!"#\$%&\(\)\*\+,\-\.\\/:;<=>\?@\[\]\^_`\{\|\}~]/u';
57
    const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i';
58
    const REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i';
59
60
    protected $regex = [];
61
62
    protected static $instance;
63
64
    /**
65
     * Constructor
66
     */
67 3
    protected function __construct()
68
    {
69 3
        $this->buildRegexPatterns();
70 3
    }
71
72
    /**
73
     * @return RegexHelper
74
     */
75 1746
    public static function getInstance()
76
    {
77 1746
        if (self::$instance === null) {
78 3
            self::$instance = new self();
79 3
        }
80
81 1746
        return self::$instance;
82
    }
83
84
    /**
85
     * Builds the regular expressions required to parse Markdown
86
     *
87
     * We could hard-code them all as pre-built constants, but that would be more difficult to manage.
88
     */
89 3
    protected function buildRegexPatterns()
90
    {
91 3
        $regex = [];
92 3
        $regex[self::ESCAPABLE] = self::REGEX_ESCAPABLE;
93 3
        $regex[self::ESCAPED_CHAR] = '\\\\' . $regex[self::ESCAPABLE];
94 3
        $regex[self::IN_DOUBLE_QUOTES] = '"(' . $regex[self::ESCAPED_CHAR] . '|[^"\x00])*"';
95 3
        $regex[self::IN_SINGLE_QUOTES] = '\'(' . $regex[self::ESCAPED_CHAR] . '|[^\'\x00])*\'';
96 3
        $regex[self::IN_PARENS] = '\\((' . $regex[self::ESCAPED_CHAR] . '|[^)\x00])*\\)';
97 3
        $regex[self::REG_CHAR] = '[^\\\\()\x00-\x20]';
98 3
        $regex[self::IN_PARENS_NOSP] = '\((' . $regex[self::REG_CHAR] . '|' . $regex[self::ESCAPED_CHAR] . '|\\\\)*\)';
99 3
        $regex[self::TAGNAME] = '[A-Za-z][A-Za-z0-9-]*';
100 3
        $regex[self::BLOCKTAGNAME] = '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|title|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)';
101 3
        $regex[self::ATTRIBUTENAME] = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
102 3
        $regex[self::UNQUOTEDVALUE] = '[^"\'=<>`\x00-\x20]+';
103 3
        $regex[self::SINGLEQUOTEDVALUE] = '\'[^\']*\'';
104 3
        $regex[self::DOUBLEQUOTEDVALUE] = '"[^"]*"';
105 3
        $regex[self::ATTRIBUTEVALUE] = '(?:' . $regex[self::UNQUOTEDVALUE] . '|' . $regex[self::SINGLEQUOTEDVALUE] . '|' . $regex[self::DOUBLEQUOTEDVALUE] . ')';
106 3
        $regex[self::ATTRIBUTEVALUESPEC] = '(?:' . '\s*=' . '\s*' . $regex[self::ATTRIBUTEVALUE] . ')';
107 3
        $regex[self::ATTRIBUTE] = '(?:' . '\s+' . $regex[self::ATTRIBUTENAME] . $regex[self::ATTRIBUTEVALUESPEC] . '?)';
108 3
        $regex[self::OPENTAG] = '<' . $regex[self::TAGNAME] . $regex[self::ATTRIBUTE] . '*' . '\s*\/?>';
109 3
        $regex[self::CLOSETAG] = '<\/' . $regex[self::TAGNAME] . '\s*[>]';
110 3
        $regex[self::OPENBLOCKTAG] = '<' . $regex[self::BLOCKTAGNAME] . $regex[self::ATTRIBUTE] . '*' . '\s*\/?>';
111 3
        $regex[self::CLOSEBLOCKTAG] = '<\/' . $regex[self::BLOCKTAGNAME] . '\s*[>]';
112 3
        $regex[self::HTMLCOMMENT] = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->';
113 3
        $regex[self::PROCESSINGINSTRUCTION] = '[<][?].*?[?][>]';
114 3
        $regex[self::DECLARATION] = '<![A-Z]+' . '\s+[^>]*>';
115 3
        $regex[self::CDATA] = '<!\[CDATA\[[\s\S]*?]\]>';
116 3
        $regex[self::HTMLTAG] = '(?:' . $regex[self::OPENTAG] . '|' . $regex[self::CLOSETAG] . '|' . $regex[self::HTMLCOMMENT] . '|' .
117 3
            $regex[self::PROCESSINGINSTRUCTION] . '|' . $regex[self::DECLARATION] . '|' . $regex[self::CDATA] . ')';
118 3
        $regex[self::HTMLBLOCKOPEN] = '<(?:' . $regex[self::BLOCKTAGNAME] . '(?:[\s\/>]|$)' . '|' .
119 3
            '\/' . $regex[self::BLOCKTAGNAME] . '(?:[\s>]|$)' . '|' . '[?!])';
120 3
        $regex[self::LINK_TITLE] = '^(?:"(' . $regex[self::ESCAPED_CHAR] . '|[^"\x00])*"' .
121 3
            '|' . '\'(' . $regex[self::ESCAPED_CHAR] . '|[^\'\x00])*\'' .
122 3
            '|' . '\((' . $regex[self::ESCAPED_CHAR] . '|[^)\x00])*\))';
123
124 3
        $this->regex = $regex;
125 3
    }
126
127
    /**
128
     * Returns a partial regex
129
     *
130
     * It'll need to be wrapped with /.../ before use
131
     *
132
     * @param int $const
133
     *
134
     * @return string
135
     */
136 582
    public function getPartialRegex($const)
137
    {
138 582
        return $this->regex[$const];
139
    }
140
141
    /**
142
     * @return string
143
     */
144 120
    public function getHtmlTagRegex()
145
    {
146 120
        return '/^' . $this->regex[self::HTMLTAG] . '/i';
147
    }
148
149
    /**
150
     * @return string
151
     */
152 258
    public function getLinkTitleRegex()
153
    {
154 258
        return '/' . $this->regex[self::LINK_TITLE] . '/';
155
    }
156
157
    /**
158
     * @return string
159
     */
160 357
    public function getLinkDestinationRegex()
161
    {
162 357
        return '/^' . '(?:' . $this->regex[self::REG_CHAR] . '+|' . $this->regex[self::ESCAPED_CHAR] . '|\\\\|' . $this->regex[self::IN_PARENS_NOSP] . ')*' . '/';
163
    }
164
165
    /**
166
     * @return string
167
     */
168 372
    public function getLinkDestinationBracesRegex()
169
    {
170 372
        return '/^(?:' . '[<](?:[^<>\\n\\\\\\x00]' . '|' . $this->regex[self::ESCAPED_CHAR] . '|' . '\\\\)*[>]' . ')/';
171
    }
172
173
    /**
174
     * @return string
175
     */
176 1620
    public function getThematicBreakRegex()
177
    {
178 1620
        return '/^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/';
179
    }
180
181
    /**
182
     * Attempt to match a regex in string s at offset offset
183
     *
184
     * @param string $regex
185
     * @param string $string
186
     * @param int    $offset
187
     *
188
     * @return int|null Index of match, or null
189
     */
190 1725
    public static function matchAt($regex, $string, $offset = 0)
191
    {
192 1725
        $matches = [];
193 1725
        $string = mb_substr($string, $offset, null, 'utf-8');
194 1725
        if (!preg_match($regex, $string, $matches, PREG_OFFSET_CAPTURE)) {
195 1659
            return;
196
        }
197
198
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
199 237
        $charPos = mb_strlen(mb_strcut($string, 0, $matches[0][1], 'utf-8'), 'utf-8');
200
201 237
        return $offset + $charPos;
202
    }
203
204
    /**
205
     * Functional wrapper around preg_match_all
206
     *
207
     * @param string $pattern
208
     * @param string $subject
209
     * @param int    $offset
210
     *
211
     * @return array|null
212
     */
213 1818
    public static function matchAll($pattern, $subject, $offset = 0)
214
    {
215 1818
        $matches = [];
216 1818
        $subject = substr($subject, $offset);
217 1818
        preg_match_all($pattern, $subject, $matches, PREG_PATTERN_ORDER);
218
219 1818
        $fullMatches = reset($matches);
220 1818
        if (empty($fullMatches)) {
221 1779
            return;
222
        }
223
224 627
        if (count($fullMatches) === 1) {
225 627
            foreach ($matches as &$match) {
226 627
                $match = reset($match);
227 627
            }
228 627
        }
229
230 627
        if (!empty($matches)) {
231 627
            return $matches;
232
        }
233
    }
234
235
    /**
236
     * Replace backslash escapes with literal characters
237
     *
238
     * @param string $string
239
     *
240
     * @return string
241
     */
242 480
    public static function unescape($string)
243
    {
244 480
        $allEscapedChar = '/\\\\(' . self::REGEX_ESCAPABLE . ')/';
245
246 480
        $escaped = preg_replace($allEscapedChar, '$1', $string);
247 480
        $replaced = preg_replace_callback('/' . self::REGEX_ENTITY . '/i', function ($e) {
248 15
            return Html5Entities::decodeEntity($e[0]);
249 480
        }, $escaped);
250
251 480
        return $replaced;
252
    }
253
254
    /**
255
     * @param int $type HTML block type
256
     *
257
     * @return string|null
258
     */
259 240
    public static function getHtmlBlockOpenRegex($type)
260
    {
261
        switch ($type) {
262 240
            case HtmlBlock::TYPE_1_CODE_CONTAINER:
263 240
                return '/^<(?:script|pre|style)(?:\s|>|$)/i';
264 222
            case HtmlBlock::TYPE_2_COMMENT:
265 222
                return '/^<!--/';
266 213
            case HtmlBlock::TYPE_3:
267 213
                return '/^<[?]/';
268 210
            case HtmlBlock::TYPE_4:
269 210
                return '/^<![A-Z]/';
270 207
            case HtmlBlock::TYPE_5_CDATA:
271 207
                return '/^<!\[CDATA\[/';
272 204
            case HtmlBlock::TYPE_6_BLOCK_ELEMENT:
273 204
                return '%^<[/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|pre|section|source|title|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)%i';
274 138
            case HtmlBlock::TYPE_7_MISC_ELEMENT:
275 138
                $self = self::getInstance();
276
277 138
                return '/^(?:' . $self->getPartialRegex(self::OPENTAG) . '|' . $self->getPartialRegex(self::CLOSETAG) . ')\\s*$/i';
278
        }
279
    }
280
281
    /**
282
     * @param int $type HTML block type
283
     *
284
     * @return string|null
285
     */
286 42
    public static function getHtmlBlockCloseRegex($type)
287
    {
288
        switch ($type) {
289 42
            case HtmlBlock::TYPE_1_CODE_CONTAINER:
290 24
                return '%<\/(?:script|pre|style)>%i';
291 18
            case HtmlBlock::TYPE_2_COMMENT:
292 9
                return '/-->/';
293 9
            case HtmlBlock::TYPE_3:
294 3
                return '/\?>/';
295 6
            case HtmlBlock::TYPE_4:
296 3
                return '/>/';
297 3
            case HtmlBlock::TYPE_5_CDATA:
298 3
                return '/\]\]>/';
299
        }
300
    }
301
302
    /**
303
     * @param string $url
304
     *
305
     * @return bool
306
     */
307 3
    public static function isLinkPotentiallyUnsafe($url)
308
    {
309 3
        return preg_match(self::REGEX_UNSAFE_PROTOCOL, $url) !== 0 && preg_match(self::REGEX_SAFE_DATA_PROTOCOL, $url) === 0;
310
    }
311
}
312