|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of PHP-Typography. |
|
4
|
|
|
* |
|
5
|
|
|
* Copyright 2014-2017 Peter Putzer. |
|
6
|
|
|
* Copyright 2009-2011 KINGdesk, LLC. |
|
7
|
|
|
* |
|
8
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
9
|
|
|
* it under the terms of the GNU General Public License as published by |
|
10
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
11
|
|
|
* (at your option) any later version. |
|
12
|
|
|
* |
|
13
|
|
|
* This program is distributed in the hope that it will be useful, |
|
14
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
16
|
|
|
* GNU General Public License for more details. |
|
17
|
|
|
* |
|
18
|
|
|
* You should have received a copy of the GNU General Public License along |
|
19
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
20
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
21
|
|
|
* |
|
22
|
|
|
* *** |
|
23
|
|
|
* |
|
24
|
|
|
* @package mundschenk-at/php-typography |
|
25
|
|
|
* @license http://www.gnu.org/licenses/gpl-2.0.html |
|
26
|
|
|
*/ |
|
27
|
|
|
|
|
28
|
|
|
namespace PHP_Typography\Fixes\Node_Fixes; |
|
29
|
|
|
|
|
30
|
|
|
use \PHP_Typography\DOM; |
|
31
|
|
|
use \PHP_Typography\RE; |
|
32
|
|
|
use \PHP_Typography\Settings; |
|
33
|
|
|
use \PHP_Typography\Strings; |
|
34
|
|
|
use \PHP_Typography\U; |
|
35
|
|
|
|
|
36
|
|
|
/** |
|
37
|
|
|
* Applies smart quotes (if enabled). |
|
38
|
|
|
* |
|
39
|
|
|
* @author Peter Putzer <[email protected]> |
|
40
|
|
|
* |
|
41
|
|
|
* @since 5.0.0 |
|
42
|
|
|
*/ |
|
43
|
|
|
class Smart_Quotes_Fix extends Abstract_Node_Fix { |
|
44
|
|
|
|
|
45
|
|
|
const APOSTROPHE_EXCEPTIONS = [ |
|
46
|
|
|
"'tain" . U::APOSTROPHE . 't' => U::APOSTROPHE . 'tain' . U::APOSTROPHE . 't', |
|
47
|
|
|
"'twere" => U::APOSTROPHE . 'twere', |
|
48
|
|
|
"'twas" => U::APOSTROPHE . 'twas', |
|
49
|
|
|
"'tis" => U::APOSTROPHE . 'tis', |
|
50
|
|
|
"'til" => U::APOSTROPHE . 'til', |
|
51
|
|
|
"'bout" => U::APOSTROPHE . 'bout', |
|
52
|
|
|
"'nuff" => U::APOSTROPHE . 'nuff', |
|
53
|
|
|
"'round" => U::APOSTROPHE . 'round', |
|
54
|
|
|
"'cause" => U::APOSTROPHE . 'cause', |
|
55
|
|
|
"'splainin" => U::APOSTROPHE . 'splainin', |
|
56
|
|
|
]; |
|
57
|
|
|
|
|
58
|
|
|
const NUMBERS_BEFORE_PRIME = '\b(?:\d+\/)?\d{1,3}'; |
|
59
|
|
|
|
|
60
|
|
|
const DOUBLE_PRIME = '/(' . self::NUMBERS_BEFORE_PRIME . ")''(?=\W|\Z)/u"; |
|
61
|
|
|
const DOUBLE_PRIME_COMPOUND = '/(' . self::NUMBERS_BEFORE_PRIME . ")''(?=-\w)/u"; |
|
62
|
|
|
const DOUBLE_PRIME_1_GLYPH = '/(' . self::NUMBERS_BEFORE_PRIME . ')"(?=\W|\Z)/u'; |
|
63
|
|
|
const DOUBLE_PRIME_1_GLYPH_COMPOUND = '/(' . self::NUMBERS_BEFORE_PRIME . ')"(?=-\w)/u'; |
|
64
|
|
|
const SINGLE_PRIME = '/(' . self::NUMBERS_BEFORE_PRIME . ")'(?=\W|\Z)/u"; |
|
65
|
|
|
const SINGLE_PRIME_COMPOUND = '/(' . self::NUMBERS_BEFORE_PRIME . ")'(?=-\w)/u"; |
|
66
|
|
|
const SINGLE_DOUBLE_PRIME = '/(' . self::NUMBERS_BEFORE_PRIME . ")'(\s*)(\b(?:\d+\/)?\d+)''(?=\W|\Z)/u"; |
|
67
|
|
|
const SINGLE_DOUBLE_PRIME_1_GLYPH = '/(' . self::NUMBERS_BEFORE_PRIME . ")'(\s*)(\b(?:\d+\/)?\d+)\"(?=\W|\Z)/u"; |
|
68
|
|
|
|
|
69
|
|
|
const SINGLE_QUOTED_NUMBERS = "/(?<=\W|\A)'([^\"]*\d+)'(?=\W|\Z)/u"; |
|
70
|
|
|
const DOUBLE_QUOTED_NUMBERS = '/(?<=\W|\A)"([^"]*\d+)"(?=\W|\Z)/u'; |
|
71
|
|
|
const COMMA_QUOTE = '/(?<=\s|\A),(?=\S)/'; |
|
72
|
|
|
const APOSTROPHE_WORDS = "/(?<=[\w])'(?=[\w])/u"; |
|
73
|
|
|
const APOSTROPHE_DECADES = "/'(\d\d\b)/"; |
|
74
|
|
|
const SINGLE_QUOTE_OPEN = "/'(?=[\w])/u"; |
|
75
|
|
|
const SINGLE_QUOTE_CLOSE = "/(?<=[\w])'/u"; |
|
76
|
|
|
const SINGLE_QUOTE_OPEN_SPECIAL = "/(?<=\s|\A)'(?=\S)/"; // like _'¿hola?'_. |
|
77
|
|
|
const SINGLE_QUOTE_CLOSE_SPECIAL = "/(?<=\S)'(?=\s|\Z)/"; |
|
78
|
|
|
const DOUBLE_QUOTE_OPEN = '/"(?=[\w])/u'; |
|
79
|
|
|
const DOUBLE_QUOTE_CLOSE = '/(?<=[\w])"/u'; |
|
80
|
|
|
const DOUBLE_QUOTE_OPEN_SPECIAL = '/(?<=\s|\A)"(?=\S)/'; |
|
81
|
|
|
const DOUBLE_QUOTE_CLOSE_SPECIAL = '/(?<=\S)"(?=\s|\Z)/'; |
|
82
|
|
|
|
|
83
|
|
|
|
|
84
|
|
|
/** |
|
85
|
|
|
* Apostrophe exceptions matching array. |
|
86
|
|
|
* |
|
87
|
|
|
* @var array |
|
88
|
|
|
*/ |
|
89
|
|
|
protected $apostrophe_exception_matches; |
|
90
|
|
|
|
|
91
|
|
|
/** |
|
92
|
|
|
* Apostrophe exceptions replacement array. |
|
93
|
|
|
* |
|
94
|
|
|
* @var array |
|
95
|
|
|
*/ |
|
96
|
|
|
protected $apostrophe_exception_replacements; |
|
97
|
|
|
|
|
98
|
|
|
/** |
|
99
|
|
|
* Cached primary quote style. |
|
100
|
|
|
* |
|
101
|
|
|
* @var \PHP_Typography\Settings\Quotes|null |
|
102
|
|
|
*/ |
|
103
|
|
|
protected $cached_primary_quotes; |
|
104
|
|
|
|
|
105
|
|
|
/** |
|
106
|
|
|
* Cached secondary quote style. |
|
107
|
|
|
* |
|
108
|
|
|
* @var \PHP_Typography\Settings\Quotes|null |
|
109
|
|
|
*/ |
|
110
|
|
|
protected $cached_secondary_quotes; |
|
111
|
|
|
|
|
112
|
|
|
/** |
|
113
|
|
|
* Brackets matching array (depending on quote styles). |
|
114
|
|
|
* |
|
115
|
|
|
* @var array |
|
116
|
|
|
*/ |
|
117
|
|
|
protected $brackets_matches; |
|
118
|
|
|
|
|
119
|
|
|
/** |
|
120
|
|
|
* Brackets replacement array (depending on quote styles). |
|
121
|
|
|
* |
|
122
|
|
|
* @var array |
|
123
|
|
|
*/ |
|
124
|
|
|
protected $brackets_replacements; |
|
125
|
|
|
|
|
126
|
|
|
/** |
|
127
|
|
|
* Creates a new fix instance. |
|
128
|
|
|
* |
|
129
|
|
|
* @param bool $feed_compatible Optional. Default false. |
|
130
|
|
|
*/ |
|
131
|
|
|
public function __construct( $feed_compatible = false ) { |
|
132
|
|
|
parent::__construct( $feed_compatible ); |
|
133
|
|
|
|
|
134
|
|
|
$this->apostrophe_exception_matches = array_keys( self::APOSTROPHE_EXCEPTIONS ); |
|
135
|
|
|
$this->apostrophe_exception_replacements = array_values( self::APOSTROPHE_EXCEPTIONS ); |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
/** |
|
139
|
|
|
* Apply the fix to a given textnode. |
|
140
|
|
|
* |
|
141
|
|
|
* @param \DOMText $textnode Required. |
|
142
|
|
|
* @param Settings $settings Required. |
|
143
|
|
|
* @param bool $is_title Optional. Default false. |
|
144
|
|
|
*/ |
|
145
|
|
|
public function apply( \DOMText $textnode, Settings $settings, $is_title = false ) { |
|
146
|
|
|
if ( empty( $settings['smartQuotes'] ) ) { |
|
147
|
|
|
return; |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
// Clone the node's data attribute for the duration. |
|
151
|
|
|
$node_data = $textnode->data; |
|
152
|
|
|
|
|
153
|
|
|
// Need to get context of adjacent characters outside adjacent inline tags or HTML comment |
|
154
|
|
|
// if we have adjacent characters add them to the text. |
|
155
|
|
|
$previous_character = DOM::get_prev_chr( $textnode ); |
|
156
|
|
|
$next_character = DOM::get_next_chr( $textnode ); |
|
157
|
|
|
$node_data = "{$previous_character}{$node_data}{$next_character}"; |
|
158
|
|
|
|
|
159
|
|
|
// Various special characters and regular expressions. |
|
160
|
|
|
$double = $settings->primary_quote_style(); |
|
161
|
|
|
$single = $settings->secondary_quote_style(); |
|
162
|
|
|
|
|
163
|
|
|
// Mark quotes to ensure proper removal of replaced adjacent characters. |
|
164
|
|
|
$double_open = RE::ESCAPE_MARKER . $double->open() . RE::ESCAPE_MARKER; |
|
165
|
|
|
$double_close = RE::ESCAPE_MARKER . $double->close() . RE::ESCAPE_MARKER; |
|
166
|
|
|
$single_open = RE::ESCAPE_MARKER . $single->open() . RE::ESCAPE_MARKER; |
|
167
|
|
|
$single_close = RE::ESCAPE_MARKER . $single->close() . RE::ESCAPE_MARKER; |
|
168
|
|
|
|
|
169
|
|
|
if ( $double != $this->cached_primary_quotes || $single != $this->cached_secondary_quotes ) { // WPCS: loose comparison ok. |
|
170
|
|
|
$this->update_smart_quotes_brackets( $double_open, $double_close, $single_open, $single_close ); |
|
171
|
|
|
$this->cached_primary_quotes = $double; |
|
172
|
|
|
$this->cached_secondary_quotes = $single; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
// Before primes, handle quoted numbers (and quotes ending in numbers). |
|
176
|
|
|
$node_data = preg_replace( self::SINGLE_QUOTED_NUMBERS, "{$single_open}\$1{$single_close}", $node_data ); |
|
177
|
|
|
$node_data = preg_replace( self::DOUBLE_QUOTED_NUMBERS, "{$double_open}\$1{$double_close}", $node_data ); |
|
178
|
|
|
|
|
179
|
|
|
// Guillemets. |
|
180
|
|
|
$node_data = str_replace( '<<', U::GUILLEMET_OPEN, $node_data ); |
|
181
|
|
|
$node_data = str_replace( '<<', U::GUILLEMET_OPEN, $node_data ); |
|
182
|
|
|
$node_data = str_replace( '>>', U::GUILLEMET_CLOSE, $node_data ); |
|
183
|
|
|
$node_data = str_replace( '>>', U::GUILLEMET_CLOSE, $node_data ); |
|
184
|
|
|
|
|
185
|
|
|
// Primes. |
|
186
|
|
|
$node_data = preg_replace( self::SINGLE_DOUBLE_PRIME, '$1' . U::SINGLE_PRIME . '$2$3' . U::DOUBLE_PRIME, $node_data ); |
|
187
|
|
|
$node_data = preg_replace( self::SINGLE_DOUBLE_PRIME_1_GLYPH, '$1' . U::SINGLE_PRIME . '$2$3' . U::DOUBLE_PRIME, $node_data ); |
|
188
|
|
|
$node_data = preg_replace( self::DOUBLE_PRIME, '$1' . U::DOUBLE_PRIME, $node_data ); // should not interfere with regular quote matching. |
|
189
|
|
|
$node_data = preg_replace( self::SINGLE_PRIME, '$1' . U::SINGLE_PRIME, $node_data ); |
|
190
|
|
|
$node_data = preg_replace( self::SINGLE_PRIME_COMPOUND, '$1' . U::SINGLE_PRIME, $node_data ); |
|
191
|
|
|
$node_data = preg_replace( self::DOUBLE_PRIME_COMPOUND, '$1' . U::DOUBLE_PRIME, $node_data ); |
|
192
|
|
|
$node_data = preg_replace( self::DOUBLE_PRIME_1_GLYPH, '$1' . U::DOUBLE_PRIME, $node_data ); // should not interfere with regular quote matching. |
|
193
|
|
|
$node_data = preg_replace( self::DOUBLE_PRIME_1_GLYPH_COMPOUND, '$1' . U::DOUBLE_PRIME, $node_data ); |
|
194
|
|
|
|
|
195
|
|
|
// Backticks. |
|
196
|
|
|
$node_data = str_replace( '``', $double_open, $node_data ); |
|
197
|
|
|
$node_data = str_replace( '`', $single_open, $node_data ); |
|
198
|
|
|
$node_data = str_replace( "''", $double_close, $node_data ); |
|
199
|
|
|
|
|
200
|
|
|
// Comma quotes. |
|
201
|
|
|
$node_data = str_replace( ',,', U::DOUBLE_LOW_9_QUOTE, $node_data ); |
|
202
|
|
|
$node_data = preg_replace( self::COMMA_QUOTE, U::SINGLE_LOW_9_QUOTE, $node_data ); // like _,¿hola?'_. |
|
203
|
|
|
|
|
204
|
|
|
// Apostrophes. |
|
205
|
|
|
$node_data = preg_replace( self::APOSTROPHE_WORDS, U::APOSTROPHE, $node_data ); |
|
206
|
|
|
$node_data = preg_replace( self::APOSTROPHE_DECADES, U::APOSTROPHE . '$1', $node_data ); // decades: '98. |
|
207
|
|
|
$node_data = str_replace( $this->apostrophe_exception_matches, $this->apostrophe_exception_replacements, $node_data ); |
|
208
|
|
|
|
|
209
|
|
|
// Quotes. |
|
210
|
|
|
$node_data = str_replace( $this->brackets_matches, $this->brackets_replacements, $node_data ); |
|
211
|
|
|
$node_data = preg_replace( self::SINGLE_QUOTE_OPEN, $single_open, $node_data ); |
|
212
|
|
|
$node_data = preg_replace( self::SINGLE_QUOTE_CLOSE, $single_close, $node_data ); |
|
213
|
|
|
$node_data = preg_replace( self::SINGLE_QUOTE_OPEN_SPECIAL, $single_open, $node_data ); // like _'¿hola?'_. |
|
214
|
|
|
$node_data = preg_replace( self::SINGLE_QUOTE_CLOSE_SPECIAL, $single_close, $node_data ); |
|
215
|
|
|
$node_data = preg_replace( self::DOUBLE_QUOTE_OPEN, $double_open, $node_data ); |
|
216
|
|
|
$node_data = preg_replace( self::DOUBLE_QUOTE_CLOSE, $double_close, $node_data ); |
|
217
|
|
|
$node_data = preg_replace( self::DOUBLE_QUOTE_OPEN_SPECIAL, $double_open, $node_data ); |
|
218
|
|
|
$node_data = preg_replace( self::DOUBLE_QUOTE_CLOSE_SPECIAL, $double_close, $node_data ); |
|
219
|
|
|
|
|
220
|
|
|
// Quote catch-alls - assume left over quotes are closing - as this is often the most complicated position, thus most likely to be missed. |
|
221
|
|
|
$node_data = str_replace( "'", $single_close, $node_data ); |
|
222
|
|
|
$node_data = str_replace( '"', $double_close, $node_data ); |
|
223
|
|
|
|
|
224
|
|
|
// Check if adjacent characters where replaced with multi-byte replacements. |
|
225
|
|
|
$quotes = [ $double_open, $double_close, $single_open, $single_close ]; |
|
226
|
|
|
$func = Strings::functions( $node_data ); |
|
227
|
|
|
$substr = $func['substr']; |
|
228
|
|
|
$strlen = $func['strlen']; |
|
229
|
|
|
$previous_length = $strlen( $previous_character ); |
|
230
|
|
|
$next_length = $strlen( $next_character ); |
|
231
|
|
|
|
|
232
|
|
View Code Duplication |
if ( $previous_length > 0 && $previous_character !== $substr( $node_data, $previous_length ) ) { |
|
|
|
|
|
|
233
|
|
|
$previous_length = self::calc_adjacent_length( $previous_length, $node_data, $quotes, $substr, $strlen, false ); |
|
234
|
|
|
} |
|
235
|
|
View Code Duplication |
if ( $next_length > 0 && $next_character !== $substr( $node_data, -$next_length ) ) { |
|
|
|
|
|
|
236
|
|
|
$next_length = self::calc_adjacent_length( $next_length, $node_data, $quotes, $substr, $strlen, true ); |
|
237
|
|
|
} |
|
238
|
|
|
|
|
239
|
|
|
// If we have adjacent characters, remove them from the text. |
|
240
|
|
|
$node_data = self::remove_adjacent_characters( $node_data, $previous_length, $next_length ); |
|
241
|
|
|
|
|
242
|
|
|
// Remove the escape markers and restore the text to the actual node. |
|
243
|
|
|
$textnode->data = str_replace( RE::ESCAPE_MARKER, '', $node_data ); |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
/** |
|
247
|
|
|
* Calculates the adjacent character length. |
|
248
|
|
|
* |
|
249
|
|
|
* @param int $current The current length of the adjacent character(s). |
|
250
|
|
|
* @param string $haystack The complete string. |
|
251
|
|
|
* @param string[] $needles The replacement to look for. |
|
252
|
|
|
* @param callable $substr A `substr`-like function. |
|
253
|
|
|
* @param callable $strlen A 'strlen'-like function. |
|
254
|
|
|
* @param bool $reverse Optional. Default false. |
|
255
|
|
|
* |
|
256
|
|
|
* @return int |
|
257
|
|
|
*/ |
|
258
|
|
|
private static function calc_adjacent_length( $current, $haystack, array $needles, callable $substr, callable $strlen, $reverse = false ) { |
|
259
|
|
|
foreach ( $needles as $needle ) { |
|
260
|
|
|
$len = $strlen( $needle ); |
|
261
|
|
|
|
|
262
|
|
|
if ( $needle === $substr( $haystack, ( $reverse ? -$len : 0 ), $len ) ) { |
|
263
|
|
|
return $len; |
|
264
|
|
|
} |
|
265
|
|
|
} |
|
266
|
|
|
|
|
267
|
|
|
return $current; |
|
268
|
|
|
} |
|
269
|
|
|
|
|
270
|
|
|
/** |
|
271
|
|
|
* Update smartQuotesBrackets component after quote style change. |
|
272
|
|
|
* |
|
273
|
|
|
* @param string $primary_open Primary quote style open. |
|
274
|
|
|
* @param string $primary_close Primary quote style close. |
|
275
|
|
|
* @param string $secondary_open Secondary quote style open. |
|
276
|
|
|
* @param string $secondary_close Secondary quote style close. |
|
277
|
|
|
*/ |
|
278
|
|
|
private function update_smart_quotes_brackets( $primary_open, $primary_close, $secondary_open, $secondary_close ) { |
|
279
|
|
|
$brackets = [ |
|
280
|
|
|
// Single quotes. |
|
281
|
|
|
"['" => '[' . $secondary_open, |
|
282
|
|
|
"{'" => '{' . $secondary_open, |
|
283
|
|
|
"('" => '(' . $secondary_open, |
|
284
|
|
|
"']" => $secondary_close . ']', |
|
285
|
|
|
"'}" => $secondary_close . '}', |
|
286
|
|
|
"')" => $secondary_close . ')', |
|
287
|
|
|
|
|
288
|
|
|
// Double quotes. |
|
289
|
|
|
'["' => '[' . $primary_open, |
|
290
|
|
|
'{"' => '{' . $primary_open, |
|
291
|
|
|
'("' => '(' . $primary_open, |
|
292
|
|
|
'"]' => $primary_close . ']', |
|
293
|
|
|
'"}' => $primary_close . '}', |
|
294
|
|
|
'")' => $primary_close . ')', |
|
295
|
|
|
|
|
296
|
|
|
// Quotes & quotes. |
|
297
|
|
|
"\"'" => $primary_open . $secondary_open, |
|
298
|
|
|
"'\"" => $secondary_close . $primary_close, |
|
299
|
|
|
]; |
|
300
|
|
|
|
|
301
|
|
|
$this->brackets_matches = array_keys( $brackets ); |
|
302
|
|
|
$this->brackets_replacements = array_values( $brackets ); |
|
303
|
|
|
} |
|
304
|
|
|
} |
|
305
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.