Completed
Push — master ( f83eb2...04a653 )
by Josh
35:52
created

Parser::parseGuillemets()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 9
nc 3
nop 0
dl 0
loc 17
ccs 8
cts 8
cp 1
crap 3
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\FancyPants;
9
10
use s9e\TextFormatter\Plugins\ParserBase;
11
12
class Parser extends ParserBase
13
{
14
	/**
15
	* @var bool Whether currrent test contains a double quote character
16
	*/
17
	protected $hasDoubleQuote;
18
19
	/**
20
	* @var bool Whether currrent test contains a single quote character
21
	*/
22
	protected $hasSingleQuote;
23
24
	/**
25
	* @var string Text being parsed
26
	*/
27
	protected $text;
28
29
	/**
30
	* {@inheritdoc}
31
	*/
32 65
	public function parse($text, array $matches)
33
	{
34 65
		$this->text           = $text;
35 65
		$this->hasSingleQuote = (strpos($text, "'") !== false);
36 65
		$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38 65
		$this->parseSingleQuotes();
39 65
		$this->parseSymbolsAfterDigits();
40 65
		$this->parseSingleQuotePairs();
41 65
		$this->parseDoubleQuotePairs();
42 65
		$this->parseDashesAndEllipses();
43 65
		$this->parseSymbolsInParentheses();
44
		$this->parseNotEqualSign();
45 65
		$this->parseGuillemets();
46 65
47
		unset($this->text);
48
	}
49
50
	/**
51
	* Add a fancy replacement tag
52
	*
53
	* @param  integer $tagPos Position of the tag in the text
54
	* @param  integer $tagLen Length of text consumed by the tag
55
	* @param  string  $chr    Replacement character
56
	* @param  integer $prio   Tag's priority
57 65
	* @return \s9e\TextFormatter\Parser\Tag
58
	*/
59 65
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
60 65
	{
61
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
62 65
		$tag->setAttribute($this->config['attrName'], $chr);
63
64
		return $tag;
65
	}
66
67
	/**
68
	* Parse dashes and ellipses
69
	*
70
	* Does en dash –, em dash — and ellipsis …
71
	*
72 65
	* @return void
73
	*/
74 65
	protected function parseDashesAndEllipses()
75 65
	{
76 53
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
77
		{
78
			return;
79
		}
80 12
81 12
		$chrs = [
82
			'--'  => "\xE2\x80\x93",
83 12
			'---' => "\xE2\x80\x94",
84 12
			'...' => "\xE2\x80\xA6"
85 12
		];
86 12
		$regexp = '/---?|\\.\\.\\./S';
87
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
88 12
		foreach ($matches[0] as $m)
89 12
		{
90 12
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
91
		}
92
	}
93
94
	/**
95
	* Parse pairs of double quotes
96
	*
97
	* Does quote pairs “” -- must be done separately to handle nesting
98
	*
99 65
	* @return void
100
	*/
101 65
	protected function parseDoubleQuotePairs()
102 65
	{
103 11
		if ($this->hasDoubleQuote)
104 11
		{
105 11
			$this->parseQuotePairs(
106
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
107 11
				"\xE2\x80\x9C",
108 11
				"\xE2\x80\x9D"
109 65
			);
110
		}
111
	}
112
113
	/**
114
	* Parse guillemets-style quotation marks
115
	*
116
	* @return void
117
	*/
118
	protected function parseGuillemets()
119 37
	{
120
		if (strpos($this->text, '<<') === false)
121 37
		{
122 37
			return;
123
		}
124 10
125 10
		$regexp = '/<<( ?)(?! )[^\\n<>]*?[^\\n <>]\\1>>(?!>)/';
126
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
127
		foreach ($matches[0] as $m)
128
		{
129 10
			$left  = $this->addTag($m[1],                     2, "\xC2\xAB");
130 37
			$right = $this->addTag($m[1] + strlen($m[0]) - 2, 2, "\xC2\xBB");
131 37
132
			$left->cascadeInvalidationTo($right);
133
		}
134
	}
135
136
	/**
137
	* Parse the not equal sign
138
	*
139
	* @return void
140 65
	*/
141
	protected function parseNotEqualSign()
142 65
	{
143 65
		if (strpos($this->text, '!=') === false)
144 30
		{
145 30
			return;
146 30
		}
147
148 30
		$regexp = '/\\b !=(?= \\b)/';
149 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
150 65
		foreach ($matches[0] as $m)
151
		{
152
			$this->addTag($m[1] + 1, 2, "\xE2\x89\xA0");
153
		}
154
	}
155
156
	/**
157
	* Parse pairs of quotes
158
	*
159 65
	* @param  string $regexp     Regexp used to identify quote pairs
160
	* @param  string $leftQuote  Fancy replacement for left quote
161 65
	* @param  string $rightQuote Fancy replacement for right quote
162 65
	* @return void
163 35
	*/
164
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
165
	{
166 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
167 30
		foreach ($matches[0] as $m)
168 30
		{
169
			$left  = $this->addTag($m[1], 1, $leftQuote);
170
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
171 20
172 30
			// Cascade left tag's invalidation to the right so that if we skip the left quote,
173 30
			// the right quote remains untouched
174
			$left->cascadeInvalidationTo($right);
175
		}
176
	}
177
178
	/**
179
	* Parse pairs of single quotes
180
	*
181
	* Does quote pairs ‘’ must be done separately to handle nesting
182
	*
183
	* @return void
184
	*/
185 65
	protected function parseSingleQuotePairs()
186
	{
187 65
		if ($this->hasSingleQuote)
188 65
		{
189 24
			$this->parseQuotePairs(
190
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
191
				"\xE2\x80\x98",
192 41
				"\xE2\x80\x99"
193 41
			);
194 41
		}
195
	}
196
197 15
	/**
198 15
	* Parse single quotes in general
199 9
	*
200 9
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
201
	*
202
	* @return void
203 15
	*/
204 15
	protected function parseSingleQuotes()
205 15
	{
206 11
		if (!$this->hasSingleQuote)
207 11
		{
208
			return;
209 2
		}
210 2
211
		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
212
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
213
		foreach ($matches[0] as $m)
214 9
		{
215
			// Give this tag a worse priority than default so that quote pairs take precedence
216
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
217 11
		}
218 11
	}
219 41
220 41
	/**
221
	* Parse symbols found after digits
222
	*
223
	* Does symbols found after a digit:
224
	*  - apostrophe ’ if it's followed by an "s" as in 80's
225
	*  - prime ′ and double prime ″
226
	*  - multiply sign × if it's followed by an optional space and another digit
227
	*
228
	* @return void
229 65
	*/
230
	protected function parseSymbolsAfterDigits()
231 65
	{
232 65
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
233 53
		{
234
			return;
235
		}
236
237 12
		$map = [
238 12
			// 80's -- use an apostrophe
239
			"'s" => "\xE2\x80\x99",
240 12
			// 12' or 12" -- use a prime
241 12
			"'"  => "\xE2\x80\xB2",
242 12
			"' " => "\xE2\x80\xB2",
243 12
			"'x" => "\xE2\x80\xB2",
244
			'"'  => "\xE2\x80\xB3",
245 12
			'" ' => "\xE2\x80\xB3",
246 12
			'"x' => "\xE2\x80\xB3"
247 12
		];
248
249
		$regexp = "/[0-9](?>'s|[\"']? ?x(?= ?[0-9])|[\"'])/S";
250
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
251
		foreach ($matches[0] as $m)
252
		{
253
			// Test for a multiply sign at the end
254
			if (substr($m[0], -1) === 'x')
255
			{
256
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
257
			}
258
259
			// Test for an apostrophe/prime right after the digit
260
			$str = substr($m[0], 1, 2);
261
			if (isset($map[$str]))
262
			{
263
				$this->addTag($m[1] + 1, 1, $map[$str]);
264
			}
265
		}
266
	}
267
268
	/**
269
	* Parse symbols found in parentheses such as (c)
270
	*
271
	* Does symbols ©, ® and ™
272
	*
273
	* @return void
274
	*/
275
	protected function parseSymbolsInParentheses()
276
	{
277
		if (strpos($this->text, '(') === false)
278
		{
279
			return;
280
		}
281
282
		$chrs = [
283
			'(c)'  => "\xC2\xA9",
284
			'(r)'  => "\xC2\xAE",
285
			'(tm)' => "\xE2\x84\xA2"
286
		];
287
		$regexp = '/\\((?>c|r|tm)\\)/i';
288
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
289
		foreach ($matches[0] as $m)
290
		{
291
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
292
		}
293
	}
294
}