Parser - Code Metrics - s9e/TextFormatter - Measure and Improve Code Quality continuously with Scrutinizer

Parser A
last analyzed 2025-05-02 21:56 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	340
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	40
eloc	124
dl	0
loc	340
ccs	100
cts	100
cp	1
rs	9.2
c	0
b	0
f	0

12 Methods

Rating	Name	Size	Complexity
A	parseGuillemets()	15	3
B	parseSymbolsAfterDigits()	34	7
A	parseSingleQuotePairs()	8	2
A	parseDashesAndEllipses()	17	4
A	parseFractions()	34	3
A	parseSingleQuotes()	13	3
A	parseNotEqualSign()	12	4
A	parseDoubleQuotePairs()	8	2
A	parseSymbolsInParentheses()	17	3
A	addTag()	6	1
A	parse()	32	6
A	parseQuotePairs()	11	2

How to fix Complexity

<?php

/**
* @package   s9e\TextFormatter
* @copyright Copyright (c) The s9e authors
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Plugins\FancyPants;

use s9e\TextFormatter\Plugins\ParserBase;

class Parser extends ParserBase
{
	/**
	* @var bool Whether currrent test contains a double quote character
	*/
	protected $hasDoubleQuote;

	/**
	* @var bool Whether currrent test contains a single quote character
	*/
	protected $hasSingleQuote;

	/**
	* @var string Text being parsed
	*/
	protected $text;

	/**
	* {@inheritdoc}
	*/
	public function parse($text, array $matches)
	{
		$this->text           = $text;
		$this->hasSingleQuote = (strpos($text, "'") !== false);
		$this->hasDoubleQuote = (strpos($text, '"') !== false);

		if (empty($this->config['disableQuotes']))
		{
			$this->parseSingleQuotes();
			$this->parseSingleQuotePairs();
			$this->parseDoubleQuotePairs();
		}
		if (empty($this->config['disableGuillemets']))
		{
			$this->parseGuillemets();
		}
		if (empty($this->config['disableMathSymbols']))
		{
			$this->parseNotEqualSign();
			$this->parseSymbolsAfterDigits();
			$this->parseFractions();
		}
		if (empty($this->config['disablePunctuation']))
		{
			$this->parseDashesAndEllipses();
		}
		if (empty($this->config['disableSymbols']))
		{
			$this->parseSymbolsInParentheses();
		}

		unset($this->text);
	}

	/**
	* Add a fancy replacement tag
	*
	* @param  integer $tagPos Position of the tag in the text
	* @param  integer $tagLen Length of text consumed by the tag
	* @param  string  $chr    Replacement character
	* @param  integer $prio   Tag's priority
	* @return \s9e\TextFormatter\Parser\Tag
	*/
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
	{
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
		$tag->setAttribute($this->config['attrName'], $chr);

		return $tag;
	}

	/**
	* Parse dashes and ellipses
	*
	* Does en dash –, em dash — and ellipsis …
	*
	* @return void
	*/
	protected function parseDashesAndEllipses()
	{
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
		{
			return;
		}

		$chrs = [
			'--'  => "\xE2\x80\x93",
			'---' => "\xE2\x80\x94",
			'...' => "\xE2\x80\xA6"
		];
		$regexp = '/---?|\\.\\.\\./S';
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
		}
	}

	/**
	* Parse pairs of double quotes
	*
	* Does quote pairs “” -- must be done separately to handle nesting
	*
	* @return void
	*/
	protected function parseDoubleQuotePairs()
	{
		if ($this->hasDoubleQuote)
		{
			$this->parseQuotePairs(
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
				"\xE2\x80\x9C",
				"\xE2\x80\x9D"
			);
		}
	}

	/**
	* Parse vulgar fractions
	*
	* @return void
	*/
	protected function parseFractions()
	{
		if (strpos($this->text, '/') === false)
		{
			return;
		}

		$map = [
			'1/4'  => "\xC2\xBC",
			'1/2'  => "\xC2\xBD",
			'3/4'  => "\xC2\xBE",
			'1/7'  => "\xE2\x85\x90",
			'1/9'  => "\xE2\x85\x91",
			'1/10' => "\xE2\x85\x92",
			'1/3'  => "\xE2\x85\x93",
			'2/3'  => "\xE2\x85\x94",
			'1/5'  => "\xE2\x85\x95",
			'2/5'  => "\xE2\x85\x96",
			'3/5'  => "\xE2\x85\x97",
			'4/5'  => "\xE2\x85\x98",
			'1/6'  => "\xE2\x85\x99",
			'5/6'  => "\xE2\x85\x9A",
			'1/8'  => "\xE2\x85\x9B",
			'3/8'  => "\xE2\x85\x9C",
			'5/8'  => "\xE2\x85\x9D",
			'7/8'  => "\xE2\x85\x9E",
			'0/3'  => "\xE2\x86\x89"
		];

		$regexp = '/\\b(?:0\\/3|1\\/(?:[2-9]|10)|2\\/[35]|3\\/[458]|4\\/5|5\\/[68]|7\\/8)\\b/S';
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$this->addTag($m[1], strlen($m[0]), $map[$m[0]]);
		}
	}

	/**
	* Parse guillemets-style quotation marks
	*
	* @return void
	*/
	protected function parseGuillemets()
	{
		if (strpos($this->text, '<<') === false)
		{
			return;
		}

		$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$left  = $this->addTag($m[1],                     2, "\xC2\xAB");
			$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");

			$left->cascadeInvalidationTo($right);
		}
	}

	/**
	* Parse the not equal sign
	*
	* Supports != and =/=
	*
	* @return void
	*/
	protected function parseNotEqualSign()
	{
		if (strpos($this->text, '!=') === false && strpos($this->text, '=/=') === false)
		{
			return;
		}

		$regexp = '/\\b (?:!|=\\/)=(?= \\b)/';
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$this->addTag($m[1] + 1, strlen($m[0]) - 1, "\xE2\x89\xA0");
		}
	}

	/**
	* Parse pairs of quotes
	*
	* @param  string $regexp     Regexp used to identify quote pairs
	* @param  string $leftQuote  Fancy replacement for left quote
	* @param  string $rightQuote Fancy replacement for right quote
	* @return void
	*/
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
	{
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$left  = $this->addTag($m[1], 1, $leftQuote);
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);

			// Cascade left tag's invalidation to the right so that if we skip the left quote,
			// the right quote remains untouched
			$left->cascadeInvalidationTo($right);
		}
	}

	/**
	* Parse pairs of single quotes
	*
	* Does quote pairs ‘’ must be done separately to handle nesting
	*
	* @return void
	*/
	protected function parseSingleQuotePairs()
	{
		if ($this->hasSingleQuote)
		{
			$this->parseQuotePairs(
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
				"\xE2\x80\x98",
				"\xE2\x80\x99"
			);
		}
	}

	/**
	* Parse single quotes in general
	*
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
	*
	* @return void
	*/
	protected function parseSingleQuotes()
	{
		if (!$this->hasSingleQuote)
		{
			return;
		}

		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			// Give this tag a worse priority than default so that quote pairs take precedence
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
		}
	}

	/**
	* Parse symbols found after digits
	*
	* Does symbols found after a digit:
	*  - apostrophe ’ if it's followed by an "s" as in 80's
	*  - prime ′ and double prime ″
	*  - multiply sign × if it's followed by an optional space and another digit
	*
	* @return void
	*/
	protected function parseSymbolsAfterDigits()
	{
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
		{
			return;
		}

		$map = [
			// 80's -- use an apostrophe
			"'s" => "\xE2\x80\x99",
			// 12' or 12" -- use a prime
			"'"  => "\xE2\x80\xB2",
			"' " => "\xE2\x80\xB2",
			"'x" => "\xE2\x80\xB2",
			'"'  => "\xE2\x80\xB3",
			'" ' => "\xE2\x80\xB3",
			'"x' => "\xE2\x80\xB3"
		];

		$regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S";
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			// Test for a multiply sign at the end
			if (substr($m[0], -1) === 'x')
			{
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
			}

			// Test for an apostrophe/prime right after the digit
			$str = substr($m[0], 1, 2);
			if (isset($map[$str]))
			{
				$this->addTag($m[1] + 1, 1, $map[$str]);
			}
		}
	}

	/**
	* Parse symbols found in parentheses such as (c)
	*
	* Does symbols ©, ® and ™
	*
	* @return void
	*/
	protected function parseSymbolsInParentheses()
	{
		if (strpos($this->text, '(') === false)
		{
			return;
		}

		$chrs = [
			'(c)'  => "\xC2\xA9",
			'(r)'  => "\xC2\xAE",
			'(tm)' => "\xE2\x84\xA2"
		];
		$regexp = '/\\((?>c|r|tm)\\)/i';
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
		foreach ($matches[0] as $m)
		{
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
		}
	}
}

1		<?php
2
3		/**
4		* @package s9e\TextFormatter
5		* @copyright Copyright (c) The s9e authors
6		* @license http://www.opensource.org/licenses/mit-license.php The MIT License
7		*/
8		namespace s9e\TextFormatter\Plugins\FancyPants;
9
10		use s9e\TextFormatter\Plugins\ParserBase;
11
12		class Parser extends ParserBase
13		{
14		/**
15		* @var bool Whether currrent test contains a double quote character
16		*/
17		protected $hasDoubleQuote;
18
19		/**
20		* @var bool Whether currrent test contains a single quote character
21		*/
22		protected $hasSingleQuote;
23
24		/**
25		* @var string Text being parsed
26		*/
27		protected $text;
28
29		/**
30		* {@inheritdoc}
31		*/
32	78	public function parse($text, array $matches)
33		{
34	78	$this->text = $text;
35	78	$this->hasSingleQuote = (strpos($text, "'") !== false);
36	78	$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38	78	if (empty($this->config['disableQuotes']))
39		{
40	76	$this->parseSingleQuotes();
41	76	$this->parseSingleQuotePairs();
42	76	$this->parseDoubleQuotePairs();
43		}
44	78	if (empty($this->config['disableGuillemets']))
45		{
46	77	$this->parseGuillemets();
47		}
48	78	if (empty($this->config['disableMathSymbols']))
49		{
50	77	$this->parseNotEqualSign();
51	77	$this->parseSymbolsAfterDigits();
52	77	$this->parseFractions();
53		}
54	78	if (empty($this->config['disablePunctuation']))
55		{
56	77	$this->parseDashesAndEllipses();
57		}
58	78	if (empty($this->config['disableSymbols']))
59		{
60	77	$this->parseSymbolsInParentheses();
61		}
62
63	78	unset($this->text);
64		}
65
66		/**
67		* Add a fancy replacement tag
68		*
69		* @param integer $tagPos Position of the tag in the text
70		* @param integer $tagLen Length of text consumed by the tag
71		* @param string $chr Replacement character
72		* @param integer $prio Tag's priority
73		* @return \s9e\TextFormatter\Parser\Tag
74		*/
75	72	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
76		{
77	72	$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
78	72	$tag->setAttribute($this->config['attrName'], $chr);
79
80	72	return $tag;
81		}
82
83		/**
84		* Parse dashes and ellipses
85		*
86		* Does en dash –, em dash — and ellipsis …
87		*
88		* @return void
89		*/
90	77	protected function parseDashesAndEllipses()
91		{
92	77	if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
93		{
94	64	return;
95		}
96
97		$chrs = [
98	13	'--' => "\xE2\x80\x93",
99		'---' => "\xE2\x80\x94",
100		'...' => "\xE2\x80\xA6"
101		];
102	13	$regexp = '/---?\|\\.\\.\\./S';
103	13	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
104	13	foreach ($matches[0] as $m)
105		{
106	13	$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
107		}
108		}
109
110		/**
111		* Parse pairs of double quotes
112		*
113		* Does quote pairs “” -- must be done separately to handle nesting
114		*
115		* @return void
116		*/
117	76	protected function parseDoubleQuotePairs()
118		{
119	76	if ($this->hasDoubleQuote)
120		{
121	11	$this->parseQuotePairs(
122	11	'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
123	11	"\xE2\x80\x9C",
124	11	"\xE2\x80\x9D"
125		);
126		}
127		}
128
129		/**
130		* Parse vulgar fractions
131		*
132		* @return void
133		*/
134	77	protected function parseFractions()
135		{
136	77	if (strpos($this->text, '/') === false)
137		{
138	74	return;
139		}
140
141		$map = [
142	3	'1/4' => "\xC2\xBC",
143		'1/2' => "\xC2\xBD",
144		'3/4' => "\xC2\xBE",
145		'1/7' => "\xE2\x85\x90",
146		'1/9' => "\xE2\x85\x91",
147		'1/10' => "\xE2\x85\x92",
148		'1/3' => "\xE2\x85\x93",
149		'2/3' => "\xE2\x85\x94",
150		'1/5' => "\xE2\x85\x95",
151		'2/5' => "\xE2\x85\x96",
152		'3/5' => "\xE2\x85\x97",
153		'4/5' => "\xE2\x85\x98",
154		'1/6' => "\xE2\x85\x99",
155		'5/6' => "\xE2\x85\x9A",
156		'1/8' => "\xE2\x85\x9B",
157		'3/8' => "\xE2\x85\x9C",
158		'5/8' => "\xE2\x85\x9D",
159		'7/8' => "\xE2\x85\x9E",
160		'0/3' => "\xE2\x86\x89"
161		];
162
163	3	$regexp = '/\\b(?:0\\/3\|1\\/(?:[2-9]\|10)\|2\\/[35]\|3\\/[458]\|4\\/5\|5\\/[68]\|7\\/8)\\b/S';
164	3	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
165	3	foreach ($matches[0] as $m)
166		{
167	2	$this->addTag($m[1], strlen($m[0]), $map[$m[0]]);
168		}
169		}
170
171		/**
172		* Parse guillemets-style quotation marks
173		*
174		* @return void
175		*/
176	77	protected function parseGuillemets()
177		{
178	77	if (strpos($this->text, '<<') === false)
179		{
180	74	return;
181		}
182
183	3	$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
184	3	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
185	3	foreach ($matches[0] as $m)
186		{
187	2	$left = $this->addTag($m[1], 2, "\xC2\xAB");
188	2	$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
189
190	2	$left->cascadeInvalidationTo($right);
191		}
192		}
193
194		/**
195		* Parse the not equal sign
196		*
197		* Supports != and =/=
198		*
199		* @return void
200		*/
201	77	protected function parseNotEqualSign()
202		{
203	77	if (strpos($this->text, '!=') === false && strpos($this->text, '=/=') === false)
204		{
205	75	return;
206		}
207
208	2	$regexp = '/\\b (?:!\|=\\/)=(?= \\b)/';
209	2	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
210	2	foreach ($matches[0] as $m)
211		{
212	2	$this->addTag($m[1] + 1, strlen($m[0]) - 1, "\xE2\x89\xA0");
213		}
214		}
215
216		/**
217		* Parse pairs of quotes
218		*
219		* @param string $regexp Regexp used to identify quote pairs
220		* @param string $leftQuote Fancy replacement for left quote
221		* @param string $rightQuote Fancy replacement for right quote
222		* @return void
223		*/
224	37	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
225		{
226	37	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
227	37	foreach ($matches[0] as $m)
228		{
229	10	$left = $this->addTag($m[1], 1, $leftQuote);
230	10	$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
231
232		// Cascade left tag's invalidation to the right so that if we skip the left quote,
233		// the right quote remains untouched
234	10	$left->cascadeInvalidationTo($right);
235		}
236		}
237
238		/**
239		* Parse pairs of single quotes
240		*
241		* Does quote pairs ‘’ must be done separately to handle nesting
242		*
243		* @return void
244		*/
245	76	protected function parseSingleQuotePairs()
246		{
247	76	if ($this->hasSingleQuote)
248		{
249	30	$this->parseQuotePairs(
250	30	"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
251	30	"\xE2\x80\x98",
252	30	"\xE2\x80\x99"
253		);
254		}
255		}
256
257		/**
258		* Parse single quotes in general
259		*
260		* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
261		*
262		* @return void
263		*/
264	76	protected function parseSingleQuotes()
265		{
266	76	if (!$this->hasSingleQuote)
267		{
268	46	return;
269		}
270
271	30	$regexp = "/(?<=\\pL)'\|(?<!\\S)'(?=\\pL\|[0-9]{2})/uS";
272	30	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
273	30	foreach ($matches[0] as $m)
274		{
275		// Give this tag a worse priority than default so that quote pairs take precedence
276	20	$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
277		}
278		}
279
280		/**
281		* Parse symbols found after digits
282		*
283		* Does symbols found after a digit:
284		* - apostrophe ’ if it's followed by an "s" as in 80's
285		* - prime ′ and double prime ″
286		* - multiply sign × if it's followed by an optional space and another digit
287		*
288		* @return void
289		*/
290	77	protected function parseSymbolsAfterDigits()
291		{
292	77	if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
293		{
294	34	return;
295		}
296
297		$map = [
298		// 80's -- use an apostrophe
299	43	"'s" => "\xE2\x80\x99",
300		// 12' or 12" -- use a prime
301		"'" => "\xE2\x80\xB2",
302		"' " => "\xE2\x80\xB2",
303		"'x" => "\xE2\x80\xB2",
304		'"' => "\xE2\x80\xB3",
305		'" ' => "\xE2\x80\xB3",
306		'"x' => "\xE2\x80\xB3"
307		];
308
309	43	$regexp = "/[0-9](?>'s\|[\"']? ?x(?= ?[0-9])\|[\"'])/S";
310	43	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
311	43	foreach ($matches[0] as $m)
312		{
313		// Test for a multiply sign at the end
314	16	if (substr($m[0], -1) === 'x')
315		{
316	10	$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
317		}
318
319		// Test for an apostrophe/prime right after the digit
320	16	$str = substr($m[0], 1, 2);
321	16	if (isset($map[$str]))
322		{
323	11	$this->addTag($m[1] + 1, 1, $map[$str]);
324		}
325		}
326		}
327
328		/**
329		* Parse symbols found in parentheses such as (c)
330		*
331		* Does symbols ©, ® and ™
332		*
333		* @return void
334		*/
335	77	protected function parseSymbolsInParentheses()
336		{
337	77	if (strpos($this->text, '(') === false)
338		{
339	65	return;
340		}
341
342		$chrs = [
343	12	'(c)' => "\xC2\xA9",
344		'(r)' => "\xC2\xAE",
345		'(tm)' => "\xE2\x84\xA2"
346		];
347	12	$regexp = '/\\((?>c\|r\|tm)\\)/i';
348	12	preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
349	12	foreach ($matches[0] as $m)
350		{
351	12	$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
352		}
353		}
354		}

s9e / TextFormatter

Parser A last analyzed 2025-05-02 21:56 UTC

Complexity

Size/Duplication

Test Coverage

Importance

12 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like

Parser A
last analyzed 2025-05-02 21:56 UTC