Completed
Push — master ( 7293db...f83eb2 )
by Josh
34:00
created

Parser::parseSymbolsInParentheses()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 19
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 11
nc 3
nop 0
dl 0
loc 19
ccs 0
cts 0
cp 0
crap 12
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\FancyPants;
9
10
use s9e\TextFormatter\Plugins\ParserBase;
11
12
class Parser extends ParserBase
13
{
14
	/**
15
	* @var bool Whether currrent test contains a double quote character
16
	*/
17
	protected $hasDoubleQuote;
18
19
	/**
20
	* @var bool Whether currrent test contains a single quote character
21
	*/
22
	protected $hasSingleQuote;
23
24
	/**
25
	* @var string Text being parsed
26
	*/
27
	protected $text;
28
29
	/**
30
	* {@inheritdoc}
31
	*/
32 65
	public function parse($text, array $matches)
33
	{
34 65
		$this->text           = $text;
35 65
		$this->hasSingleQuote = (strpos($text, "'") !== false);
36 65
		$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38 65
		$this->parseSingleQuotes();
39 65
		$this->parseSymbolsAfterDigits();
40 65
		$this->parseSingleQuotePairs();
41 65
		$this->parseDoubleQuotePairs();
42 65
		$this->parseDashesAndEllipses();
43 65
		$this->parseSymbolsInParentheses();
44
		$this->parseNotEqualSign();
45 65
		$this->parseGuillemets();
46 65
47
		unset($this->text);
48
	}
49
50
	/**
51
	* Add a fancy replacement tag
52
	*
53
	* @param  integer $tagPos Position of the tag in the text
54
	* @param  integer $tagLen Length of text consumed by the tag
55
	* @param  string  $chr    Replacement character
56
	* @param  integer $prio   Tag's priority
57 65
	* @return \s9e\TextFormatter\Parser\Tag
58
	*/
59 65
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
60 65
	{
61
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
62 65
		$tag->setAttribute($this->config['attrName'], $chr);
63
64
		return $tag;
65
	}
66
67
	/**
68
	* Parse dashes and ellipses
69
	*
70
	* Does en dash –, em dash — and ellipsis …
71
	*
72 65
	* @return void
73
	*/
74 65
	protected function parseDashesAndEllipses()
75 65
	{
76 53
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
77
		{
78
			return;
79
		}
80 12
81 12
		$chrs = [
82
			'--'  => "\xE2\x80\x93",
83 12
			'---' => "\xE2\x80\x94",
84 12
			'...' => "\xE2\x80\xA6"
85 12
		];
86 12
		$regexp = '/---?|\\.\\.\\./S';
87
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
88 12
		foreach ($matches[0] as $m)
89 12
		{
90 12
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
91
		}
92
	}
93
94
	/**
95
	* Parse pairs of double quotes
96
	*
97
	* Does quote pairs “” -- must be done separately to handle nesting
98
	*
99 65
	* @return void
100
	*/
101 65
	protected function parseDoubleQuotePairs()
102 65
	{
103 11
		if ($this->hasDoubleQuote)
104 11
		{
105 11
			$this->parseQuotePairs(
106
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
107 11
				"\xE2\x80\x9C",
108 11
				"\xE2\x80\x9D"
109 65
			);
110
		}
111
	}
112
113
	/**
114
	* Parse guillemets-style quotation marks
115
	*
116
	* @return void
117
	*/
118
	protected function parseGuillemets()
119 37
	{
120
		if (strpos($this->text, '<<') === false)
121 37
		{
122 37
			return;
123
		}
124 10
125 10
		$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
126
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
127
		foreach ($matches[0] as $m)
128
		{
129 10
			$left  = $this->addTag($m[1],                     2, "\xC2\xAB");
130 37
			$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
131 37
132
			$left->cascadeInvalidationTo($right);
133
		}
134
	}
135
136
	/**
137
	* Parse the not equal sign
138
	*
139
	* @return void
140 65
	*/
141
	protected function parseNotEqualSign()
142 65
	{
143 65
		if (strpos($this->text, '!=') === false)
144 30
		{
145 30
			return;
146 30
		}
147
148 30
		$regexp = '/\\b !=(?= \\b)/';
149 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
150 65
		foreach ($matches[0] as $m)
151
		{
152
			$this->addTag($m[1] + 1, 2, "\xE2\x89\xA0");
153
		}
154
	}
155
156
	/**
157
	* Parse pairs of quotes
158
	*
159 65
	* @param  string $regexp     Regexp used to identify quote pairs
160
	* @param  string $leftQuote  Fancy replacement for left quote
161 65
	* @param  string $rightQuote Fancy replacement for right quote
162 65
	* @return void
163 35
	*/
164
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
165
	{
166 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
167 30
		foreach ($matches[0] as $m)
168 30
		{
169
			$left  = $this->addTag($m[1], 1, $leftQuote);
170
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
171 20
172 30
			// Cascade left tag's invalidation to the right so that if we skip the left quote,
173 30
			// the right quote remains untouched
174
			$left->cascadeInvalidationTo($right);
175
		}
176
	}
177
178
	/**
179
	* Parse pairs of single quotes
180
	*
181
	* Does quote pairs ‘’ must be done separately to handle nesting
182
	*
183
	* @return void
184
	*/
185 65
	protected function parseSingleQuotePairs()
186
	{
187 65
		if ($this->hasSingleQuote)
188 65
		{
189 24
			$this->parseQuotePairs(
190
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
191
				"\xE2\x80\x98",
192 41
				"\xE2\x80\x99"
193 41
			);
194 41
		}
195
	}
196
197 15
	/**
198 15
	* Parse single quotes in general
199 9
	*
200 9
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
201
	*
202
	* @return void
203 15
	*/
204 15
	protected function parseSingleQuotes()
205 15
	{
206 11
		if (!$this->hasSingleQuote)
207 11
		{
208
			return;
209 2
		}
210 2
211
		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
212
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
213
		foreach ($matches[0] as $m)
214 9
		{
215
			// Give this tag a worse priority than default so that quote pairs take precedence
216
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
217 11
		}
218 11
	}
219 41
220 41
	/**
221
	* Parse symbols found after digits
222
	*
223
	* Does symbols found after a digit:
224
	*  - apostrophe ’ if it's followed by an "s" as in 80's
225
	*  - prime ′ and double prime ″
226
	*  - multiply sign × if it's followed by an optional space and another digit
227
	*
228
	* @return void
229 65
	*/
230
	protected function parseSymbolsAfterDigits()
231 65
	{
232 65
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
233 53
		{
234
			return;
235
		}
236
237 12
		$regexp = '/[0-9](?>\'s|["\']? ?x(?= ?[0-9])|["\'])/S';
238 12
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
239
		foreach ($matches[0] as $m)
240 12
		{
241 12
			// Test for a multiply sign at the end
242 12
			if (substr($m[0], -1) === 'x')
243 12
			{
244
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
245 12
			}
246 12
247 12
			// Test for a apostrophe/prime right after the digit
248
			$c = $m[0][1];
249
			if ($c === "'" || $c === '"')
250
			{
251
				if (substr($m[0], 1, 2) === "'s")
252
				{
253
					// 80's -- use an apostrophe
254
					$chr = "\xE2\x80\x99";
255
				}
256
				else
257
				{
258
					// 12' or 12" -- use a prime
259
					$chr = ($c === "'") ? "\xE2\x80\xB2" : "\xE2\x80\xB3";
260
				}
261
262
				$this->addTag($m[1] + 1, 1, $chr);
263
			}
264
		}
265
	}
266
267
	/**
268
	* Parse symbols found in parentheses such as (c)
269
	*
270
	* Does symbols ©, ® and ™
271
	*
272
	* @return void
273
	*/
274
	protected function parseSymbolsInParentheses()
275
	{
276
		if (strpos($this->text, '(') === false)
277
		{
278
			return;
279
		}
280
281
		$chrs = [
282
			'(c)'  => "\xC2\xA9",
283
			'(r)'  => "\xC2\xAE",
284
			'(tm)' => "\xE2\x84\xA2"
285
		];
286
		$regexp = '/\\((?>c|r|tm)\\)/i';
287
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
288
		foreach ($matches[0] as $m)
289
		{
290
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
291
		}
292
	}
293
}