Completed
Branch wip/litedown (e88c6d)
by Josh
33:00
created

ParsedText::decode()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 23
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 23
ccs 17
cts 17
cp 1
rs 8.7972
c 0
b 0
f 0
cc 4
eloc 12
nc 4
nop 1
crap 4
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Litedown\Parser;
9
10
class ParsedText
11
{
12
	/**
13
	* @var bool Whether to decode HTML entities when decoding text
14
	*/
15
	public $decodeHtmlEntities = false;
16
17
	/**
18
	* @var bool Whether text contains escape characters
19
	*/
20
	protected $hasEscapedChars = false;
21
22
	/**
23
	* @var bool Whether text contains link references
24
	*/
25
	public $hasReferences = false;
26
27
	/**
28
	* @var array Array of [label => link info]
29
	*/
30
	public $linkReferences = [];
31
32
	/**
33
	* @var string Text being parsed
34
	*/
35
	protected $text;
36
37
	/**
38
	* @param string $text Original text
39
	*/
40 263
	public function __construct($text)
41
	{
42 263
		if (strpos($text, '\\') !== false && preg_match('/\\\\[!"\'()*[\\\\\\]^_`~]/', $text))
43 263
		{
44 15
			$this->hasEscapedChars = true;
45
46
			// Encode escaped literals that have a special meaning otherwise, so that we don't have
47
			// to take them into account in regexps
48 15
			$text = strtr(
49 15
				$text,
50
				[
51 15
					'\\!' => "\x1B0", '\\"' => "\x1B1", "\\'" => "\x1B2", '\\('  => "\x1B3",
52 15
					'\\)' => "\x1B4", '\\*' => "\x1B5", '\\[' => "\x1B6", '\\\\' => "\x1B7",
53 15
					'\\]' => "\x1B8", '\\^' => "\x1B9", '\\_' => "\x1BA", '\\`'  => "\x1BB",
54
					'\\~' => "\x1BC"
55 15
				]
56 15
			);
57 15
		}
58
59
		// We append a couple of lines and a non-whitespace character at the end of the text in
60
		// order to trigger the closure of all open blocks such as quotes and lists
61 263
		$this->text = $text . "\n\n\x17";
62 263
	}
63
64
	/**
65
	* @return string
66
	*/
67 263
	public function __toString()
68
	{
69 263
		return $this->text;
70
	}
71
72
	/**
73
	* Return the character at given position
74
	*
75
	* @param  integer $pos
76
	* @return string
77
	*/
78 130
	public function charAt($pos)
79
	{
80 130
		return $this->text[$pos];
81
	}
82
83
	/**
84
	* Decode a chunk of encoded text to be used as an attribute value
85
	*
86
	* Decodes escaped literals and removes slashes and 0x1A characters
87
	*
88
	* @param  string $str Encoded text
89
	* @return string      Decoded text
90
	*/
91 69
	public function decode($str)
92
	{
93 69
		if ($this->decodeHtmlEntities && strpos($str, '&') !== false)
94 69
		{
95 1
			$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
96 1
		}
97 69
		$str = str_replace("\x1A", '', $str);
98
99 69
		if ($this->hasEscapedChars)
100 69
		{
101 7
			$str = strtr(
102 7
				$str,
103
				[
104 7
					"\x1B0" => '!', "\x1B1" => '"', "\x1B2" => "'", "\x1B3" => '(',
105 7
					"\x1B4" => ')', "\x1B5" => '*', "\x1B6" => '[', "\x1B7" => '\\',
106 7
					"\x1B8" => ']', "\x1B9" => '^', "\x1BA" => '_', "\x1BB" => '`',
107
					"\x1BC" => '~'
108 7
				]
109 7
			);
110 7
		}
111
112 69
		return $str;
113
	}
114
115
	/**
116
	* Find the first occurence of given substring starting at given position
117
	*
118
	* @param  string       $str
119
	* @param  integer      $pos
120
	* @return bool|integer
121
	*/
122 263
	public function indexOf($str, $pos = 0)
123
	{
124 263
		return strpos($this->text, $str, $pos);
125
	}
126
127
	/**
128
	* Test whether given position is preceded by whitespace
129
	*
130
	* @param  integer $pos
131
	* @return bool
132
	*/
133 59
	public function isAfterWhitespace($pos)
134
	{
135 59
		return ($pos > 0 && $this->isWhitespace($this->text[$pos - 1]));
136
	}
137
138
	/**
139
	* Test whether given character is alphanumeric
140
	*
141
	* @param  string $chr
142
	* @return bool
143
	*/
144 8
	public function isAlnum($chr)
145
	{
146 8
		return (strpos(' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', $chr) > 0);
147
	}
148
149
	/**
150
	* Test whether given position is followed by whitespace
151
	*
152
	* @param  integer $pos
153
	* @return bool
154
	*/
155 59
	public function isBeforeWhitespace($pos)
156
	{
157 59
		return $this->isWhitespace($this->text[$pos + 1]);
158
	}
159
160
	/**
161
	* Test whether a length of text is surrounded by alphanumeric characters
162
	*
163
	* @param  integer $pos Start of the text
164
	* @param  integer $len Length of the text
165
	* @return bool
166
	*/
167 8
	public function isSurroundedByAlnum($pos, $len)
168
	{
169 8
		return ($pos > 0 && $this->isAlnum($this->text[$pos - 1]) && $this->isAlnum($this->text[$pos + $len]));
170
	}
171
172
	/**
173
	* Test whether given character is an ASCII whitespace character
174
	*
175
	* NOTE: newlines are normalized to LF before parsing so we don't have to check for CR
176
	*
177
	* @param  string $chr
178
	* @return bool
179
	*/
180 59
	public function isWhitespace($chr)
181
	{
182 59
		return (strpos(" \n\t", $chr) !== false);
183
	}
184
185
	/**
186
	* Mark the boundary of a block in the original text
187
	*
188
	* @param  integer $pos
189
	* @return void
190
	*/
191 263
	public function markBoundary($pos)
192
	{
193 263
		$this->text[$pos] = "\x17";
194 263
	}
195
196
	/**
197
	* Overwrite part of the text with substitution characters ^Z (0x1A)
198
	*
199
	* @param  integer $pos Start of the range
200
	* @param  integer $len Length of text to overwrite
201
	* @return void
202
	*/
203 169
	public function overwrite($pos, $len)
204
	{
205 169
		if ($len > 0)
206 169
		{
207 169
			$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len);
208 169
		}
209
	}
210
}