Completed
Push — master ( 030028...7293db )
by Josh
23:10
created

Parser::parseDoubleQuotePairs()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 2
nop 0
dl 0
loc 11
ccs 9
cts 9
cp 1
crap 2
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\FancyPants;
9
10
use s9e\TextFormatter\Plugins\ParserBase;
11
12
class Parser extends ParserBase
13
{
14
	/**
15
	* @var bool Whether currrent test contains a double quote character
16
	*/
17
	protected $hasDoubleQuote;
18
19
	/**
20
	* @var bool Whether currrent test contains a single quote character
21
	*/
22
	protected $hasSingleQuote;
23
24
	/**
25
	* @var string Text being parsed
26
	*/
27
	protected $text;
28
29
	/**
30
	* {@inheritdoc}
31
	*/
32 65
	public function parse($text, array $matches)
33
	{
34 65
		$this->text           = $text;
35 65
		$this->hasSingleQuote = (strpos($text, "'") !== false);
36 65
		$this->hasDoubleQuote = (strpos($text, '"') !== false);
37
38 65
		$this->parseSingleQuotes();
39 65
		$this->parseSymbolsAfterDigits();
40 65
		$this->parseSingleQuotePairs();
41 65
		$this->parseDoubleQuotePairs();
42 65
		$this->parseDashesAndEllipses();
43 65
		$this->parseSymbolsInParentheses();
44
45 65
		unset($this->text);
46 65
	}
47
48
	/**
49
	* Add a fancy replacement tag
50
	*
51
	* @param  integer $tagPos Position of the tag in the text
52
	* @param  integer $tagLen Length of text consumed by the tag
53
	* @param  string  $chr    Replacement character
54
	* @param  integer $prio   Tag's priority
55
	* @return \s9e\TextFormatter\Parser\Tag
56
	*/
57 65
	protected function addTag($tagPos, $tagLen, $chr, $prio = 0)
58
	{
59 65
		$tag = $this->parser->addSelfClosingTag($this->config['tagName'], $tagPos, $tagLen, $prio);
60 65
		$tag->setAttribute($this->config['attrName'], $chr);
61
62 65
		return $tag;
63
	}
64
65
	/**
66
	* Parse dashes and ellipses
67
	*
68
	* Does en dash –, em dash — and ellipsis …
69
	*
70
	* @return void
71
	*/
72 65
	protected function parseDashesAndEllipses()
73
	{
74 65
		if (strpos($this->text, '...') === false && strpos($this->text, '--') === false)
75 65
		{
76 53
			return;
77
		}
78
79
		$chrs = [
80 12
			'--'  => "\xE2\x80\x93",
81 12
			'---' => "\xE2\x80\x94",
82
			'...' => "\xE2\x80\xA6"
83 12
		];
84 12
		$regexp = '/---?|\\.\\.\\./S';
85 12
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
86 12
		foreach ($matches[0] as $m)
87
		{
88 12
			$this->addTag($m[1], strlen($m[0]), $chrs[$m[0]]);
89 12
		}
90 12
	}
91
92
	/**
93
	* Parse pairs of double quotes
94
	*
95
	* Does quote pairs “” -- must be done separately to handle nesting
96
	*
97
	* @return void
98
	*/
99 65
	protected function parseDoubleQuotePairs()
100
	{
101 65
		if ($this->hasDoubleQuote)
102 65
		{
103 11
			$this->parseQuotePairs(
104 11
				'/(?<![0-9\\pL])"[^"\\n]+"(?![0-9\\pL])/uS',
105 11
				"\xE2\x80\x9C",
106
				"\xE2\x80\x9D"
107 11
			);
108 11
		}
109 65
	}
110
111
	/**
112
	* Parse pairs of quotes
113
	*
114
	* @param  string $regexp     Regexp used to identify quote pairs
115
	* @param  string $leftQuote  Fancy replacement for left quote
116
	* @param  string $rightQuote Fancy replacement for right quote
117
	* @return void
118
	*/
119 37
	protected function parseQuotePairs($regexp, $leftQuote, $rightQuote)
120
	{
121 37
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
122 37
		foreach ($matches[0] as $m)
123
		{
124 10
			$left  = $this->addTag($m[1], 1, $leftQuote);
125 10
			$right = $this->addTag($m[1] + strlen($m[0]) - 1, 1, $rightQuote);
126
127
			// Cascade left tag's invalidation to the right so that if we skip the left quote,
128
			// the right quote remains untouched
129 10
			$left->cascadeInvalidationTo($right);
130 37
		}
131 37
	}
132
133
	/**
134
	* Parse pairs of single quotes
135
	*
136
	* Does quote pairs ‘’ must be done separately to handle nesting
137
	*
138
	* @return void
139
	*/
140 65
	protected function parseSingleQuotePairs()
141
	{
142 65
		if ($this->hasSingleQuote)
143 65
		{
144 30
			$this->parseQuotePairs(
145 30
				"/(?<![0-9\\pL])'[^'\\n]+'(?![0-9\\pL])/uS",
146 30
				"\xE2\x80\x98",
147
				"\xE2\x80\x99"
148 30
			);
149 30
		}
150 65
	}
151
152
	/**
153
	* Parse single quotes in general
154
	*
155
	* Does apostrophes ’ after a letter or at the beginning of a word or a couple of digits
156
	*
157
	* @return void
158
	*/
159 65
	protected function parseSingleQuotes()
160
	{
161 65
		if (!$this->hasSingleQuote)
162 65
		{
163 35
			return;
164
		}
165
166 30
		$regexp = "/(?<=\\pL)'|(?<!\\S)'(?=\\pL|[0-9]{2})/uS";
167 30
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
168 30
		foreach ($matches[0] as $m)
169
		{
170
			// Give this tag a worse priority than default so that quote pairs take precedence
171 20
			$this->addTag($m[1], 1, "\xE2\x80\x99", 10);
172 30
		}
173 30
	}
174
175
	/**
176
	* Parse symbols found after digits
177
	*
178
	* Does symbols found after a digit:
179
	*  - apostrophe ’ if it's followed by an "s" as in 80's
180
	*  - prime ′ and double prime ″
181
	*  - multiply sign × if it's followed by an optional space and another digit
182
	*
183
	* @return void
184
	*/
185 65
	protected function parseSymbolsAfterDigits()
186
	{
187 65
		if (!$this->hasSingleQuote && !$this->hasDoubleQuote && strpos($this->text, 'x') === false)
188 65
		{
189 24
			return;
190
		}
191
192 41
		$regexp = '/[0-9](?>\'s|["\']? ?x(?= ?[0-9])|["\'])/S';
193 41
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
194 41
		foreach ($matches[0] as $m)
195
		{
196
			// Test for a multiply sign at the end
197 15
			if (substr($m[0], -1) === 'x')
198 15
			{
199 9
				$this->addTag($m[1] + strlen($m[0]) - 1, 1, "\xC3\x97");
200 9
			}
201
202
			// Test for a apostrophe/prime right after the digit
203 15
			$c = $m[0][1];
204 15
			if ($c === "'" || $c === '"')
205 15
			{
206 11
				if (substr($m[0], 1, 2) === "'s")
207 11
				{
208
					// 80's -- use an apostrophe
209 2
					$chr = "\xE2\x80\x99";
210 2
				}
211
				else
212
				{
213
					// 12' or 12" -- use a prime
214 9
					$chr = ($c === "'") ? "\xE2\x80\xB2" : "\xE2\x80\xB3";
215
				}
216
217 11
				$this->addTag($m[1] + 1, 1, $chr);
218 11
			}
219 41
		}
220 41
	}
221
222
	/**
223
	* Parse symbols found in parentheses such as (c)
224
	*
225
	* Does symbols ©, ® and ™
226
	*
227
	* @return void
228
	*/
229 65
	protected function parseSymbolsInParentheses()
230
	{
231 65
		if (strpos($this->text, '(') === false)
232 65
		{
233 53
			return;
234
		}
235
236
		$chrs = [
237 12
			'(c)'  => "\xC2\xA9",
238 12
			'(r)'  => "\xC2\xAE",
239
			'(tm)' => "\xE2\x84\xA2"
240 12
		];
241 12
		$regexp = '/\\((?>c|r|tm)\\)/i';
242 12
		preg_match_all($regexp, $this->text, $matches, PREG_OFFSET_CAPTURE);
243 12
		foreach ($matches[0] as $m)
244
		{
245 12
			$this->addTag($m[1], strlen($m[0]), $chrs[strtr($m[0], 'CMRT', 'cmrt')]);
246 12
		}
247
	}
248
}