1 | <?php |
||
10 | class ParsedText |
||
11 | { |
||
12 | /** |
||
13 | * @var bool Whether to decode HTML entities when decoding text |
||
14 | */ |
||
15 | public $decodeHtmlEntities = false; |
||
16 | |||
17 | /** |
||
18 | * @var bool Whether text contains escape characters |
||
19 | */ |
||
20 | protected $hasEscapedChars = false; |
||
21 | |||
22 | /** |
||
23 | * @var bool Whether text contains link references |
||
24 | */ |
||
25 | public $hasReferences = false; |
||
26 | |||
27 | /** |
||
28 | * @var array Array of [label => link info] |
||
29 | */ |
||
30 | public $linkReferences = []; |
||
31 | |||
32 | /** |
||
33 | * @var string Text being parsed |
||
34 | */ |
||
35 | protected $text; |
||
36 | |||
37 | /** |
||
38 | * @param string $text Original text |
||
39 | */ |
||
40 | 263 | public function __construct($text) |
|
41 | { |
||
42 | 263 | if (strpos($text, '\\') !== false && preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text)) |
|
43 | 263 | { |
|
44 | 15 | $this->hasEscapedChars = true; |
|
45 | |||
46 | // Encode escaped literals that have a special meaning otherwise, so that we don't have |
||
47 | // to take them into account in regexps |
||
48 | 15 | $text = strtr( |
|
49 | 15 | $text, |
|
50 | [ |
||
51 | 15 | '\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\(' => "\x1B3", |
|
52 | 15 | '\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7", |
|
53 | 15 | '\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`' => "\x1BB", |
|
54 | '\\~' => "\x1BC" |
||
55 | 15 | ] |
|
56 | 15 | ); |
|
57 | 15 | } |
|
58 | |||
59 | // We append a couple of lines and a non-whitespace character at the end of the text in |
||
60 | // order to trigger the closure of all open blocks such as quotes and lists |
||
61 | 263 | $this->text = $text . "\n\n\x17"; |
|
62 | 263 | } |
|
63 | |||
64 | /** |
||
65 | * @return string |
||
66 | */ |
||
67 | 263 | public function __toString() |
|
68 | { |
||
69 | 263 | return $this->text; |
|
70 | } |
||
71 | |||
72 | /** |
||
73 | * Return the character at given position |
||
74 | * |
||
75 | * @param integer $pos |
||
76 | * @return string |
||
77 | */ |
||
78 | 130 | public function charAt($pos) |
|
79 | { |
||
80 | 130 | return $this->text[$pos]; |
|
81 | } |
||
82 | |||
83 | /** |
||
84 | * Decode a chunk of encoded text to be used as an attribute value |
||
85 | * |
||
86 | * Decodes escaped literals and removes slashes and 0x1A characters |
||
87 | * |
||
88 | * @param string $str Encoded text |
||
89 | * @return string Decoded text |
||
90 | */ |
||
91 | 69 | public function decode($str) |
|
92 | { |
||
93 | 69 | if ($this->decodeHtmlEntities && strpos($str, '&') !== false) |
|
94 | 69 | { |
|
95 | 1 | $str = html_entity_decode($str, ENT_QUOTES, 'UTF-8'); |
|
96 | 1 | } |
|
97 | 69 | $str = str_replace("\x1A", '', $str); |
|
98 | |||
99 | 69 | if ($this->hasEscapedChars) |
|
100 | 69 | { |
|
101 | 7 | $str = strtr( |
|
102 | 7 | $str, |
|
103 | [ |
||
104 | 7 | "\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(', |
|
105 | 7 | "\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\', |
|
106 | 7 | "\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`', |
|
107 | "\x1BC" => '~' |
||
108 | 7 | ] |
|
109 | 7 | ); |
|
110 | 7 | } |
|
111 | |||
112 | 69 | return $str; |
|
113 | } |
||
114 | |||
115 | /** |
||
116 | * Find the first occurence of given substring starting at given position |
||
117 | * |
||
118 | * @param string $str |
||
119 | * @param integer $pos |
||
120 | * @return bool|integer |
||
121 | */ |
||
122 | 263 | public function indexOf($str, $pos = 0) |
|
123 | { |
||
124 | 263 | return strpos($this->text, $str, $pos); |
|
125 | } |
||
126 | |||
127 | /** |
||
128 | * Test whether given position is preceded by whitespace |
||
129 | * |
||
130 | * @param integer $pos |
||
131 | * @return bool |
||
132 | */ |
||
133 | 59 | public function isAfterWhitespace($pos) |
|
134 | { |
||
135 | 59 | return ($pos > 0 && $this->isWhitespace($this->text[$pos - 1])); |
|
136 | } |
||
137 | |||
138 | /** |
||
139 | * Test whether given character is alphanumeric |
||
140 | * |
||
141 | * @param string $chr |
||
142 | * @return bool |
||
143 | */ |
||
144 | 8 | public function isAlnum($chr) |
|
145 | { |
||
146 | 8 | return (strpos(' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', $chr) > 0); |
|
147 | } |
||
148 | |||
149 | /** |
||
150 | * Test whether given position is followed by whitespace |
||
151 | * |
||
152 | * @param integer $pos |
||
153 | * @return bool |
||
154 | */ |
||
155 | 59 | public function isBeforeWhitespace($pos) |
|
156 | { |
||
157 | 59 | return $this->isWhitespace($this->text[$pos + 1]); |
|
158 | } |
||
159 | |||
160 | /** |
||
161 | * Test whether a length of text is surrounded by alphanumeric characters |
||
162 | * |
||
163 | * @param integer $pos Start of the text |
||
164 | * @param integer $len Length of the text |
||
165 | * @return bool |
||
166 | */ |
||
167 | 8 | public function isSurroundedByAlnum($pos, $len) |
|
171 | |||
172 | /** |
||
173 | * Test whether given character is an ASCII whitespace character |
||
174 | * |
||
175 | * NOTE: newlines are normalized to LF before parsing so we don't have to check for CR |
||
176 | * |
||
177 | * @param string $chr |
||
178 | * @return bool |
||
179 | */ |
||
180 | 59 | public function isWhitespace($chr) |
|
184 | |||
185 | /** |
||
186 | * Mark the boundary of a block in the original text |
||
187 | * |
||
188 | * @param integer $pos |
||
189 | * @return void |
||
190 | */ |
||
191 | 263 | public function markBoundary($pos) |
|
195 | |||
196 | /** |
||
197 | * Overwrite part of the text with substitution characters ^Z (0x1A) |
||
198 | * |
||
199 | * @param integer $pos Start of the range |
||
200 | * @param integer $len Length of text to overwrite |
||
201 | * @return void |
||
202 | */ |
||
203 | 169 | public function overwrite($pos, $len) |
|
210 | } |