Completed
Push — master ( e7ad9f...a9a2f2 )
by Josh
20:10
created

Parser   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 298
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
dl 0
loc 298
ccs 142
cts 142
cp 1
rs 8.8
c 0
b 0
f 0
wmc 36
lcom 1
cbo 3

11 Methods

Rating   Name   Duplication   Size   Complexity  
B parse() 0 32 6
A addTag() 0 7 1
A parseDashesAndEllipses() 0 19 4
A parseDoubleQuotePairs() 0 11 2
A parseGuillemets() 0 17 3
A parseNotEqualSign() 0 14 3
A parseQuotePairs() 0 13 2
A parseSingleQuotePairs() 0 11 2
A parseSingleQuotes() 0 15 3
C parseSymbolsAfterDigits() 0 37 7
A parseSymbolsInParentheses() 0 19 3
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\FancyPants;
9
10
use s9e\TextFormatter\Plugins\ParserBase;
11
12
class Parser extends ParserBase
13
{
14
	/**
15
	* @var bool Whether currrent test contains a double quote character
16
	*/
17
	protected $hasDoubleQuote;
18
19
	/**
20
	* @var bool Whether currrent test contains a single quote character
21
	*/
22
	protected $hasSingleQuote;
23
24
	/**
25
	* @var string Text being parsed
26
	*/
27
	protected $text;
28
29
	/**
30
	* {@inheritdoc}
31
	*/
32 75
	public function parse($text, array $matches)
33
	{
34 75
		$this->text           = $text;
35 75
		$this->hasSingleQuote = (strpos($text, "'") !== false);
36 75
		$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38 75
		if (empty($this->config['disableQuotes']))
39 75
		{
40 73
			$this->parseSingleQuotes();
41 73
			$this->parseSingleQuotePairs();
42 73
			$this->parseDoubleQuotePairs();
43 73
		}
44 75
		if (empty($this->config['disableGuillemets']))
45 75
		{
46 74
			$this->parseGuillemets();
47 74
		}
48 75
		if (empty($this->config['disableMathSymbols']))
49 75
		{
50 74
			$this->parseNotEqualSign();
51 74
			$this->parseSymbolsAfterDigits();
52 74
		}
53 75
		if (empty($this->config['disablePunctuation']))
54 75
		{
55 74
			$this->parseDashesAndEllipses();
56 74
		}
57 75
		if (empty($this->config['disableSymbols']))
58 75
		{
59 74
			$this->parseSymbolsInParentheses();
60 74
		}
61
62 75
		unset($this->text);
63 75
	}
64
65
	/**
66
	* Add a fancy replacement tag
67
	*
68
	* @param  integer $tagPos Position of the tag in the text
69
	* @param  integer $tagLen Length of text consumed by the tag
70
	* @param  string  $chr    Replacement character
71
	* @param  integer $prio   Tag's priority
72
	* @return \s9e\TextFormatter\Parser\Tag
73
	*/
74 69
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
75
	{
76 69
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
77 69
		$tag->setAttribute($this->config['attrName'], $chr);
78
79 69
		return $tag;
80
	}
81
82
	/**
83
	* Parse dashes and ellipses
84
	*
85
	* Does en dash –, em dash — and ellipsis …
86
	*
87
	* @return void
88
	*/
89 74
	protected function parseDashesAndEllipses()
90
	{
91 74
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
92 74
		{
93 61
			return;
94
		}
95
96
		$chrs = [
97 13
			'--'  => "\xE2\x80\x93",
98 13
			'---' => "\xE2\x80\x94",
99
			'...' => "\xE2\x80\xA6"
100 13
		];
101 13
		$regexp = '/---?|\\.\\.\\./S';
102 13
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
103 13
		foreach ($matches[0] as $m)
104
		{
105 13
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
106 13
		}
107 13
	}
108
109
	/**
110
	* Parse pairs of double quotes
111
	*
112
	* Does quote pairs “” -- must be done separately to handle nesting
113
	*
114
	* @return void
115
	*/
116 73
	protected function parseDoubleQuotePairs()
117
	{
118 73
		if ($this->hasDoubleQuote)
119 73
		{
120 11
			$this->parseQuotePairs(
121 11
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
122 11
				"\xE2\x80\x9C",
123
				"\xE2\x80\x9D"
124 11
			);
125 11
		}
126 73
	}
127
128
	/**
129
	* Parse guillemets-style quotation marks
130
	*
131
	* @return void
132
	*/
133 74
	protected function parseGuillemets()
134
	{
135 74
		if (strpos($this->text, '<<') === false)
136 74
		{
137 71
			return;
138
		}
139
140 3
		$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
141 3
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
142 3
		foreach ($matches[0] as $m)
143
		{
144 2
			$left  = $this->addTag($m[1],                     2, "\xC2\xAB");
145 2
			$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
146
147 2
			$left->cascadeInvalidationTo($right);
148 3
		}
149 3
	}
150
151
	/**
152
	* Parse the not equal sign
153
	*
154
	* @return void
155
	*/
156 74
	protected function parseNotEqualSign()
157
	{
158 74
		if (strpos($this->text, '!=') === false)
159 74
		{
160 73
			return;
161
		}
162
163 1
		$regexp = '/\\b !=(?= \\b)/';
164 1
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
165 1
		foreach ($matches[0] as $m)
166
		{
167 1
			$this->addTag($m[1] + 1, 2, "\xE2\x89\xA0");
168 1
		}
169 1
	}
170
171
	/**
172
	* Parse pairs of quotes
173
	*
174
	* @param  string $regexp     Regexp used to identify quote pairs
175
	* @param  string $leftQuote  Fancy replacement for left quote
176
	* @param  string $rightQuote Fancy replacement for right quote
177
	* @return void
178
	*/
179 37
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
180
	{
181 37
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
182 37
		foreach ($matches[0] as $m)
183
		{
184 10
			$left  = $this->addTag($m[1], 1, $leftQuote);
185 10
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
186
187
			// Cascade left tag's invalidation to the right so that if we skip the left quote,
188
			// the right quote remains untouched
189 10
			$left->cascadeInvalidationTo($right);
190 37
		}
191 37
	}
192
193
	/**
194
	* Parse pairs of single quotes
195
	*
196
	* Does quote pairs ‘’ must be done separately to handle nesting
197
	*
198
	* @return void
199
	*/
200 73
	protected function parseSingleQuotePairs()
201
	{
202 73
		if ($this->hasSingleQuote)
203 73
		{
204 30
			$this->parseQuotePairs(
205 30
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
206 30
				"\xE2\x80\x98",
207
				"\xE2\x80\x99"
208 30
			);
209 30
		}
210 73
	}
211
212
	/**
213
	* Parse single quotes in general
214
	*
215
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
216
	*
217
	* @return void
218
	*/
219 73
	protected function parseSingleQuotes()
220
	{
221 73
		if (!$this->hasSingleQuote)
222 73
		{
223 43
			return;
224
		}
225
226 30
		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
227 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
228 30
		foreach ($matches[0] as $m)
229
		{
230
			// Give this tag a worse priority than default so that quote pairs take precedence
231 20
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
232 30
		}
233 30
	}
234
235
	/**
236
	* Parse symbols found after digits
237
	*
238
	* Does symbols found after a digit:
239
	*  - apostrophe ’ if it's followed by an "s" as in 80's
240
	*  - prime ′ and double prime ″
241
	*  - multiply sign × if it's followed by an optional space and another digit
242
	*
243
	* @return void
244
	*/
245 74
	protected function parseSymbolsAfterDigits()
246
	{
247 74
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
248 74
		{
249 32
			return;
250
		}
251
252
		$map = [
253
			// 80's -- use an apostrophe
254 42
			"'s" => "\xE2\x80\x99",
255
			// 12' or 12" -- use a prime
256 42
			"'"  => "\xE2\x80\xB2",
257 42
			"' " => "\xE2\x80\xB2",
258 42
			"'x" => "\xE2\x80\xB2",
259 42
			'"'  => "\xE2\x80\xB3",
260 42
			'" ' => "\xE2\x80\xB3",
261
			'"x' => "\xE2\x80\xB3"
262 42
		];
263
264 42
		$regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S";
265 42
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
266 42
		foreach ($matches[0] as $m)
267
		{
268
			// Test for a multiply sign at the end
269 15
			if (substr($m[0], -1) === 'x')
270 15
			{
271 9
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
272 9
			}
273
274
			// Test for an apostrophe/prime right after the digit
275 15
			$str = substr($m[0], 1, 2);
276 15
			if (isset($map[$str]))
277 15
			{
278 11
				$this->addTag($m[1] + 1, 1, $map[$str]);
279 11
			}
280 42
		}
281 42
	}
282
283
	/**
284
	* Parse symbols found in parentheses such as (c)
285
	*
286
	* Does symbols ©, ® and ™
287
	*
288
	* @return void
289
	*/
290 74
	protected function parseSymbolsInParentheses()
291
	{
292 74
		if (strpos($this->text, '(') === false)
293 74
		{
294 62
			return;
295
		}
296
297
		$chrs = [
298 12
			'(c)'  => "\xC2\xA9",
299 12
			'(r)'  => "\xC2\xAE",
300
			'(tm)' => "\xE2\x84\xA2"
301 12
		];
302 12
		$regexp = '/\\((?>c|r|tm)\\)/i';
303 12
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
304 12
		foreach ($matches[0] as $m)
305
		{
306 12
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
307 12
		}
308
	}
309
}