Completed
Branch wip/litedown (e6465f)
by Josh
34:06
created

ParsedText::decode()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 23
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 23
ccs 8
cts 8
cp 1
rs 8.7972
c 0
b 0
f 0
cc 4
eloc 12
nc 4
nop 1
crap 4
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Litedown\Parser;
9
10
class ParsedText
11
{
12
	/**
13
	* @var bool Whether to decode HTML entities when decoding text
14
	*/
15
	public $decodeHtmlEntities = false;
16
17
	/**
18
	* @var bool Whether text contains escape characters
19
	*/
20
	protected $hasEscapedChars = false;
21
22
	/**
23
	* @var bool Whether text contains link references
24
	*/
25
	protected $hasReferences = false;
26
27
	/**
28
	* @var array Array of [label => link info]
29
	*/
30
	public $linkReferences = [];
31
32
	/**
33
	* @var string Text being parsed
34
	*/
35
	protected $text;
36
37 263
	/**
38
	* @param string $text Original text
39 263
	*/
40 263
	public function __construct($text)
41 15
	{
42
		if (strpos($text, '\\') !== false && preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text))
43
		{
44
			$this->hasEscapedChars = true;
45 15
46 15
			// Encode escaped literals that have a special meaning otherwise, so that we don't have
47
			// to take them into account in regexps
48 15
			$text = strtr(
49 15
				$text,
50 15
				[
51
					'\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\('  => "\x1B3",
52 15
					'\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7",
53 15
					'\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`'  => "\x1BB",
54 15
					'\\~' => "\x1BC"
55
				]
56
			);
57
		}
58 263
59 263
		// We append a couple of lines and a non-whitespace character at the end of the text in
60
		// order to trigger the closure of all open blocks such as quotes and lists
61
		$this->text = $text . "\n\n\x17";
62
	}
63
64 263
	/**
65
	* @return string
66 263
	*/
67
	public function __toString()
68
	{
69
		return $this->text;
70
	}
71
72
	/**
73
	* Return the character at given position
74
	*
75
	* @param  integer $pos
76
	* @return string
77 69
	*/
78
	public function charAt($pos)
79 69
	{
80 69
		return $this->text[$pos];
81 1
	}
82 1
83 69
	/**
84
	* Decode a chunk of encoded text to be used as an attribute value
85 69
	*
86 69
	* Decodes escaped literals and removes slashes and 0x1A characters
87 7
	*
88 7
	* @param  string $str Encoded text
89
	* @return string      Decoded text
90 7
	*/
91 7
	public function decode($str)
92 7
	{
93
		if ($this->decodeHtmlEntities && strpos($str, '&') !== false)
94 7
		{
95 7
			$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
96 7
		}
97
		$str = str_replace("\x1A", '', $str);
98 69
99
		if ($this->hasEscapedChars)
100
		{
101
			$str = strtr(
102
				$str,
103
				[
104
					"\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(',
105
					"\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\',
106
					"\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`',
107 59
					"\x1BC" => '~'
108
				]
109 59
			);
110
		}
111
112
		return $str;
113
	}
114
115
	/**
116
	* Find the first occurence of given substring starting at given position
117
	*
118 8
	* @param  string       $str
119
	* @param  integer      $pos
120 8
	* @return bool|integer
121
	*/
122
	public function indexOf($str, $pos = 0)
123
	{
124
		return strpos($this->text, $str, $pos);
125
	}
126
127
	/**
128
	* Test whether given position is preceded by whitespace
129 59
	*
130
	* @param  integer $pos
131 59
	* @return bool
132
	*/
133
	public function isAfterWhitespace($pos)
134
	{
135
		return ($pos > 0 && $this->isWhitespace($this->text[$pos - 1]));
136
	}
137
138
	/**
139
	* Test whether given character is alphanumeric
140
	*
141 8
	* @param  string $chr
142
	* @return bool
143 8
	*/
144
	public function isAlnum($chr)
145
	{
146
		return (strpos(' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', $chr) > 0);
147
	}
148
149
	/**
150
	* Test whether given position is followed by whitespace
151
	*
152
	* @param  integer $pos
153
	* @return bool
154 59
	*/
155
	public function isBeforeWhitespace($pos)
156 59
	{
157
		return $this->isWhitespace($this->text[$pos + 1]);
158
	}
159
160
	/**
161
	* Test whether a length of text is surrounded by alphanumeric characters
162
	*
163
	* @param  integer $pos Start of the text
164
	* @param  integer $len Length of the text
165 263
	* @return bool
166
	*/
167 263
	public function isSurroundedByAlnum($pos, $len)
168 263
	{
169
		return ($pos > 0 && $this->isAlnum($this->text[$pos - 1]) && $this->isAlnum($this->text[$pos + $len]));
170
	}
171
172
	/**
173
	* Test whether given character is an ASCII whitespace character
174
	*
175
	* NOTE: newlines are normalized to LF before parsing so we don't have to check for CR
176
	*
177
	* @param  string $chr
178
	* @return bool
179
	*/
180
	public function isWhitespace($chr)
181
	{
182
		return (strpos(" \n\t", $chr) !== false);
183 130
	}
184
185 130
	/**
186
	* Mark the boundary of a block in the original text
187
	*
188
	* @param  integer $pos
189
	* @return void
190
	*/
191
	public function markBoundary($pos)
192
	{
193
		$this->text[$pos] = "\x17";
194
	}
195
196
	/**
197
	* Overwrite part of the text with substitution characters ^Z (0x1A)
198
	*
199
	* @param  integer $pos Start of the range
200
	* @param  integer $len Length of text to overwrite
201
	* @return void
202
	*/
203
	public function overwrite($pos, $len)
204
	{
205
		if ($len > 0)
206
		{
207
			$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len);
208
		}
209
	}
210
}