Parser   A
last analyzed

Complexity

Total Complexity 40

Size/Duplication

Total Lines 340
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 40
eloc 124
dl 0
loc 340
ccs 100
cts 100
cp 1
rs 9.2
c 0
b 0
f 0

12 Methods

Rating   Name   Duplication   Size   Complexity  
A parseGuillemets() 0 15 3
B parseSymbolsAfterDigits() 0 34 7
A parseSingleQuotePairs() 0 8 2
A parseDashesAndEllipses() 0 17 4
A parseFractions() 0 34 3
A parseSingleQuotes() 0 13 3
A parseNotEqualSign() 0 12 4
A parseDoubleQuotePairs() 0 8 2
A parseSymbolsInParentheses() 0 17 3
A addTag() 0 6 1
A parse() 0 32 6
A parseQuotePairs() 0 11 2

How to fix   Complexity   

Complex Class

Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\FancyPants;
9
10
use s9e\TextFormatter\Plugins\ParserBase;
11
12
class Parser extends ParserBase
13
{
14
	/**
15
	* @var bool Whether currrent test contains a double quote character
16
	*/
17
	protected $hasDoubleQuote;
18
19
	/**
20
	* @var bool Whether currrent test contains a single quote character
21
	*/
22
	protected $hasSingleQuote;
23
24
	/**
25
	* @var string Text being parsed
26
	*/
27
	protected $text;
28
29
	/**
30
	* {@inheritdoc}
31
	*/
32 78
	public function parse($text, array $matches)
33
	{
34 78
		$this->text           = $text;
35 78
		$this->hasSingleQuote = (strpos($text, "'") !== false);
36 78
		$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38 78
		if (empty($this->config['disableQuotes']))
39
		{
40 76
			$this->parseSingleQuotes();
41 76
			$this->parseSingleQuotePairs();
42 76
			$this->parseDoubleQuotePairs();
43
		}
44 78
		if (empty($this->config['disableGuillemets']))
45
		{
46 77
			$this->parseGuillemets();
47
		}
48 78
		if (empty($this->config['disableMathSymbols']))
49
		{
50 77
			$this->parseNotEqualSign();
51 77
			$this->parseSymbolsAfterDigits();
52 77
			$this->parseFractions();
53
		}
54 78
		if (empty($this->config['disablePunctuation']))
55
		{
56 77
			$this->parseDashesAndEllipses();
57
		}
58 78
		if (empty($this->config['disableSymbols']))
59
		{
60 77
			$this->parseSymbolsInParentheses();
61
		}
62
63 78
		unset($this->text);
64
	}
65
66
	/**
67
	* Add a fancy replacement tag
68
	*
69
	* @param  integer $tagPos Position of the tag in the text
70
	* @param  integer $tagLen Length of text consumed by the tag
71
	* @param  string  $chr    Replacement character
72
	* @param  integer $prio   Tag's priority
73
	* @return \s9e\TextFormatter\Parser\Tag
74
	*/
75 72
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
76
	{
77 72
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
78 72
		$tag->setAttribute($this->config['attrName'], $chr);
79
80 72
		return $tag;
81
	}
82
83
	/**
84
	* Parse dashes and ellipses
85
	*
86
	* Does en dash –, em dash — and ellipsis …
87
	*
88
	* @return void
89
	*/
90 77
	protected function parseDashesAndEllipses()
91
	{
92 77
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
93
		{
94 64
			return;
95
		}
96
97
		$chrs = [
98 13
			'--'  => "\xE2\x80\x93",
99
			'---' => "\xE2\x80\x94",
100
			'...' => "\xE2\x80\xA6"
101
		];
102 13
		$regexp = '/---?|\\.\\.\\./S';
103 13
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
104 13
		foreach ($matches[0] as $m)
105
		{
106 13
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
107
		}
108
	}
109
110
	/**
111
	* Parse pairs of double quotes
112
	*
113
	* Does quote pairs “” -- must be done separately to handle nesting
114
	*
115
	* @return void
116
	*/
117 76
	protected function parseDoubleQuotePairs()
118
	{
119 76
		if ($this->hasDoubleQuote)
120
		{
121 11
			$this->parseQuotePairs(
122 11
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
123 11
				"\xE2\x80\x9C",
124 11
				"\xE2\x80\x9D"
125
			);
126
		}
127
	}
128
129
	/**
130
	* Parse vulgar fractions
131
	*
132
	* @return void
133
	*/
134 77
	protected function parseFractions()
135
	{
136 77
		if (strpos($this->text, '/') === false)
137
		{
138 74
			return;
139
		}
140
141
		$map = [
142 3
			'1/4'  => "\xC2\xBC",
143
			'1/2'  => "\xC2\xBD",
144
			'3/4'  => "\xC2\xBE",
145
			'1/7'  => "\xE2\x85\x90",
146
			'1/9'  => "\xE2\x85\x91",
147
			'1/10' => "\xE2\x85\x92",
148
			'1/3'  => "\xE2\x85\x93",
149
			'2/3'  => "\xE2\x85\x94",
150
			'1/5'  => "\xE2\x85\x95",
151
			'2/5'  => "\xE2\x85\x96",
152
			'3/5'  => "\xE2\x85\x97",
153
			'4/5'  => "\xE2\x85\x98",
154
			'1/6'  => "\xE2\x85\x99",
155
			'5/6'  => "\xE2\x85\x9A",
156
			'1/8'  => "\xE2\x85\x9B",
157
			'3/8'  => "\xE2\x85\x9C",
158
			'5/8'  => "\xE2\x85\x9D",
159
			'7/8'  => "\xE2\x85\x9E",
160
			'0/3'  => "\xE2\x86\x89"
161
		];
162
163 3
		$regexp = '/\\b(?:0\\/3|1\\/(?:[2-9]|10)|2\\/[35]|3\\/[458]|4\\/5|5\\/[68]|7\\/8)\\b/S';
164 3
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
165 3
		foreach ($matches[0] as $m)
166
		{
167 2
			$this->addTag($m[1], strlen($m[0]), $map[$m[0]]);
168
		}
169
	}
170
171
	/**
172
	* Parse guillemets-style quotation marks
173
	*
174
	* @return void
175
	*/
176 77
	protected function parseGuillemets()
177
	{
178 77
		if (strpos($this->text, '<<') === false)
179
		{
180 74
			return;
181
		}
182
183 3
		$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
184 3
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
185 3
		foreach ($matches[0] as $m)
186
		{
187 2
			$left  = $this->addTag($m[1],                     2, "\xC2\xAB");
188 2
			$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
189
190 2
			$left->cascadeInvalidationTo($right);
191
		}
192
	}
193
194
	/**
195
	* Parse the not equal sign
196
	*
197
	* Supports != and =/=
198
	*
199
	* @return void
200
	*/
201 77
	protected function parseNotEqualSign()
202
	{
203 77
		if (strpos($this->text, '!=') === false && strpos($this->text, '=/=') === false)
204
		{
205 75
			return;
206
		}
207
208 2
		$regexp = '/\\b (?:!|=\\/)=(?= \\b)/';
209 2
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
210 2
		foreach ($matches[0] as $m)
211
		{
212 2
			$this->addTag($m[1] + 1, strlen($m[0]) - 1, "\xE2\x89\xA0");
213
		}
214
	}
215
216
	/**
217
	* Parse pairs of quotes
218
	*
219
	* @param  string $regexp     Regexp used to identify quote pairs
220
	* @param  string $leftQuote  Fancy replacement for left quote
221
	* @param  string $rightQuote Fancy replacement for right quote
222
	* @return void
223
	*/
224 37
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
225
	{
226 37
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
227 37
		foreach ($matches[0] as $m)
228
		{
229 10
			$left  = $this->addTag($m[1], 1, $leftQuote);
230 10
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
231
232
			// Cascade left tag's invalidation to the right so that if we skip the left quote,
233
			// the right quote remains untouched
234 10
			$left->cascadeInvalidationTo($right);
235
		}
236
	}
237
238
	/**
239
	* Parse pairs of single quotes
240
	*
241
	* Does quote pairs ‘’ must be done separately to handle nesting
242
	*
243
	* @return void
244
	*/
245 76
	protected function parseSingleQuotePairs()
246
	{
247 76
		if ($this->hasSingleQuote)
248
		{
249 30
			$this->parseQuotePairs(
250 30
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
251 30
				"\xE2\x80\x98",
252 30
				"\xE2\x80\x99"
253
			);
254
		}
255
	}
256
257
	/**
258
	* Parse single quotes in general
259
	*
260
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
261
	*
262
	* @return void
263
	*/
264 76
	protected function parseSingleQuotes()
265
	{
266 76
		if (!$this->hasSingleQuote)
267
		{
268 46
			return;
269
		}
270
271 30
		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
272 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
273 30
		foreach ($matches[0] as $m)
274
		{
275
			// Give this tag a worse priority than default so that quote pairs take precedence
276 20
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
277
		}
278
	}
279
280
	/**
281
	* Parse symbols found after digits
282
	*
283
	* Does symbols found after a digit:
284
	*  - apostrophe ’ if it's followed by an "s" as in 80's
285
	*  - prime ′ and double prime ″
286
	*  - multiply sign × if it's followed by an optional space and another digit
287
	*
288
	* @return void
289
	*/
290 77
	protected function parseSymbolsAfterDigits()
291
	{
292 77
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
293
		{
294 34
			return;
295
		}
296
297
		$map = [
298
			// 80's -- use an apostrophe
299 43
			"'s" => "\xE2\x80\x99",
300
			// 12' or 12" -- use a prime
301
			"'"  => "\xE2\x80\xB2",
302
			"' " => "\xE2\x80\xB2",
303
			"'x" => "\xE2\x80\xB2",
304
			'"'  => "\xE2\x80\xB3",
305
			'" ' => "\xE2\x80\xB3",
306
			'"x' => "\xE2\x80\xB3"
307
		];
308
309 43
		$regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S";
310 43
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
311 43
		foreach ($matches[0] as $m)
312
		{
313
			// Test for a multiply sign at the end
314 16
			if (substr($m[0], -1) === 'x')
315
			{
316 10
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
317
			}
318
319
			// Test for an apostrophe/prime right after the digit
320 16
			$str = substr($m[0], 1, 2);
321 16
			if (isset($map[$str]))
322
			{
323 11
				$this->addTag($m[1] + 1, 1, $map[$str]);
324
			}
325
		}
326
	}
327
328
	/**
329
	* Parse symbols found in parentheses such as (c)
330
	*
331
	* Does symbols ©, ® and ™
332
	*
333
	* @return void
334
	*/
335 77
	protected function parseSymbolsInParentheses()
336
	{
337 77
		if (strpos($this->text, '(') === false)
338
		{
339 65
			return;
340
		}
341
342
		$chrs = [
343 12
			'(c)'  => "\xC2\xA9",
344
			'(r)'  => "\xC2\xAE",
345
			'(tm)' => "\xE2\x84\xA2"
346
		];
347 12
		$regexp = '/\\((?>c|r|tm)\\)/i';
348 12
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
349 12
		foreach ($matches[0] as $m)
350
		{
351 12
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
352
		}
353
	}
354
}