We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
Total Complexity | 53 |
Total Lines | 355 |
Duplicated Lines | 0 % |
Coverage | 100% |
Changes | 3 | ||
Bugs | 0 | Features | 0 |
Complex classes like Hyphenator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Hyphenator, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
45 | class Hyphenator { |
||
46 | |||
47 | /** |
||
48 | * The hyphenation patterns, stored in a trie for easier searching. |
||
49 | * |
||
50 | * @var Trie_Node|null |
||
51 | */ |
||
52 | protected $pattern_trie; |
||
53 | |||
54 | /** |
||
55 | * The hyphenation exceptions from the pattern file. |
||
56 | * Stored as an array of "hy-phen-at-ed" strings. |
||
57 | * |
||
58 | * @var array |
||
59 | */ |
||
60 | protected $pattern_exceptions; |
||
61 | |||
62 | /** |
||
63 | * Custom hyphenation exceptions set by the user. |
||
64 | * Stored as an array of "hy-phen-at-ed" strings. |
||
65 | * |
||
66 | * @var array |
||
67 | */ |
||
68 | protected $custom_exceptions; |
||
69 | |||
70 | /** |
||
71 | * A binary hash of $custom_exceptions array. |
||
72 | * |
||
73 | * @var string |
||
74 | */ |
||
75 | protected $custom_exceptions_hash; |
||
76 | |||
77 | /** |
||
78 | * Patterns calculated from the merged hyphenation exceptions. |
||
79 | * |
||
80 | * @var array|null |
||
81 | */ |
||
82 | protected $merged_exception_patterns; |
||
83 | |||
84 | /** |
||
85 | * The current hyphenation language. |
||
86 | * Stored in the short form (e.g. "en-US"). |
||
87 | * |
||
88 | * @var string|null |
||
89 | */ |
||
90 | protected $language; |
||
91 | |||
92 | /** |
||
93 | * Constructs new Hyphenator instance. |
||
94 | * |
||
95 | * @param string|null $language Optional. Short-form language name. Default null. |
||
96 | * @param array $exceptions Optional. Custom hyphenation exceptions. Default empty array. |
||
97 | */ |
||
98 | 1 | public function __construct( $language = null, array $exceptions = [] ) { |
|
99 | |||
100 | 1 | if ( ! empty( $language ) ) { |
|
101 | 1 | $this->set_language( $language ); |
|
102 | } |
||
103 | |||
104 | 1 | if ( ! empty( $exceptions ) ) { |
|
105 | 1 | $this->set_custom_exceptions( $exceptions ); |
|
106 | } |
||
107 | 1 | } |
|
108 | |||
109 | /** |
||
110 | * Sets custom word hyphenations. |
||
111 | * |
||
112 | * @param array $exceptions Optional. An array of words with all hyphenation points marked with a hard hyphen. Default empty array. |
||
113 | */ |
||
114 | 4 | public function set_custom_exceptions( array $exceptions = [] ) { |
|
115 | 4 | if ( empty( $exceptions ) && empty( $this->custom_exceptions ) ) { |
|
116 | 1 | return; // Nothing to do at all. |
|
117 | } |
||
118 | |||
119 | // Calculate hash & check against previous exceptions. |
||
120 | 3 | $new_hash = self::get_object_hash( $exceptions ); |
|
121 | 3 | if ( $this->custom_exceptions_hash === $new_hash ) { |
|
122 | 1 | return; // No need to update exceptions. |
|
123 | } |
||
124 | |||
125 | // Do our thing. |
||
126 | 3 | $exception_keys = []; |
|
127 | 3 | foreach ( $exceptions as $exception ) { |
|
128 | 3 | $f = Strings::functions( $exception ); |
|
129 | 3 | if ( empty( $f ) ) { |
|
130 | 1 | continue; // unknown encoding, abort. |
|
131 | } |
||
132 | |||
133 | 3 | $exception = $f['strtolower']( $exception ); |
|
134 | 3 | $exception_keys[ $exception ] = \preg_replace( "#-#{$f['u']}", '', $exception ); |
|
135 | } |
||
136 | |||
137 | // Update exceptions. |
||
138 | 3 | $this->custom_exceptions = \array_flip( $exception_keys ); |
|
139 | 3 | $this->custom_exceptions_hash = $new_hash; |
|
140 | |||
141 | // Force remerging of patgen and custom exception patterns. |
||
142 | 3 | $this->merged_exception_patterns = null; |
|
143 | 3 | } |
|
144 | |||
145 | /** |
||
146 | * Calculates binary-safe hash from data object. |
||
147 | * |
||
148 | * @param mixed $object Any datatype. |
||
149 | * |
||
150 | * @return string |
||
151 | */ |
||
152 | 1 | protected static function get_object_hash( $object ) { |
|
153 | 1 | return \md5( \json_encode( $object ), false ); |
|
154 | } |
||
155 | |||
156 | /** |
||
157 | * Sets the hyphenation pattern language. |
||
158 | * |
||
159 | * @param string $lang Optional. Has to correspond to a filename in 'lang'. Default 'en-US'. |
||
160 | * |
||
161 | * @return bool Whether loading the pattern file was successful. |
||
162 | */ |
||
163 | 3 | public function set_language( $lang = 'en-US' ) { |
|
164 | 3 | if ( isset( $this->language ) && $this->language === $lang ) { |
|
165 | 1 | return true; // Bail out, no need to do anything. |
|
166 | } |
||
167 | |||
168 | 3 | $success = false; |
|
169 | 3 | $language_file_name = \dirname( __FILE__ ) . '/lang/' . $lang . '.json'; |
|
170 | |||
171 | 3 | if ( \file_exists( $language_file_name ) ) { |
|
172 | 3 | $raw_language_file = \file_get_contents( $language_file_name ); |
|
173 | |||
174 | 3 | if ( false !== $raw_language_file ) { |
|
175 | 3 | $language_file = \json_decode( $raw_language_file, true ); |
|
176 | |||
177 | 3 | if ( false !== $language_file ) { |
|
178 | 3 | $this->language = $lang; |
|
179 | 3 | $this->pattern_trie = Trie_Node::build_trie( $language_file['patterns'] ); |
|
180 | 3 | $this->pattern_exceptions = $language_file['exceptions']; |
|
181 | |||
182 | 3 | $success = true; |
|
183 | } |
||
184 | } |
||
185 | } |
||
186 | |||
187 | // Clean up. |
||
188 | 3 | if ( ! $success ) { |
|
189 | 1 | $this->language = null; |
|
190 | 1 | $this->pattern_trie = null; |
|
191 | 1 | $this->pattern_exceptions = []; |
|
192 | } |
||
193 | |||
194 | // Make sure hyphenationExceptions is not set to force remerging of patgen and custom exceptions. |
||
195 | 3 | $this->merged_exception_patterns = null; |
|
196 | |||
197 | 3 | return $success; |
|
198 | } |
||
199 | |||
200 | /** |
||
201 | * Hyphenates parsed text tokens. |
||
202 | * |
||
203 | * @param array $parsed_text_tokens An array of text tokens. |
||
204 | * @param string $hyphen Optional. The hyphen character. Default '-'. |
||
205 | * @param bool $hyphenate_title_case Optional. Whether words in Title Case should be hyphenated. Default false. |
||
206 | * @param int $min_length Optional. Minimum word length for hyphenation. Default 2. |
||
207 | * @param int $min_before Optional. Minimum number of characters before a hyphenation point. Default 2. |
||
208 | * @param int $min_after Optional. Minimum number of characters after a hyphenation point. Default 2. |
||
209 | * |
||
210 | * @return Token[] The modified text tokens. |
||
211 | */ |
||
212 | 19 | public function hyphenate( array $parsed_text_tokens, $hyphen = '-', $hyphenate_title_case = false, $min_length = 2, $min_before = 2, $min_after = 2 ) { |
|
213 | 19 | if ( empty( $min_length ) || empty( $min_before ) || ! isset( $this->pattern_trie ) || ! isset( $this->pattern_exceptions ) ) { |
|
214 | 1 | return $parsed_text_tokens; |
|
215 | } |
||
216 | |||
217 | // Make sure we have full exceptions list. |
||
218 | 18 | if ( ! isset( $this->merged_exception_patterns ) ) { |
|
219 | 18 | $this->merge_hyphenation_exceptions(); |
|
220 | } |
||
221 | |||
222 | 18 | foreach ( $parsed_text_tokens as $key => $text_token ) { |
|
223 | 18 | $parsed_text_tokens[ $key ] = $text_token->with_value( $this->hyphenate_word( $text_token->value, $hyphen, $hyphenate_title_case, $min_length, $min_before, $min_after ) ); |
|
224 | } |
||
225 | |||
226 | 18 | return $parsed_text_tokens; |
|
227 | } |
||
228 | |||
229 | /** |
||
230 | * Hyphenates a single word. |
||
231 | * |
||
232 | * @param string $word The word to hyphenate. |
||
233 | * @param string $hyphen The hyphen character. |
||
234 | * @param bool $hyphenate_title_case Whether words in Title Case should be hyphenated. |
||
235 | * @param int $min_length Minimum word length for hyphenation. |
||
236 | * @param int $min_before Minimum number of characters before a hyphenation point. |
||
237 | * @param int $min_after Minimum number of characters after a hyphenation point. |
||
238 | * |
||
239 | * @return string |
||
240 | */ |
||
241 | 18 | protected function hyphenate_word( $word, $hyphen, $hyphenate_title_case, $min_length, $min_before, $min_after ) { |
|
242 | // Quickly reference string functions according to encoding. |
||
243 | 18 | $f = Strings::functions( $word ); |
|
244 | 18 | if ( empty( $f ) ) { |
|
245 | 1 | return $word; // unknown encoding, abort. |
|
246 | } |
||
247 | |||
248 | // Check word length. |
||
249 | 18 | $word_length = $f['strlen']( $word ); |
|
250 | 18 | if ( $word_length < $min_length ) { |
|
251 | 4 | return $word; |
|
252 | } |
||
253 | |||
254 | // Trie lookup requires a lowercase search term. |
||
255 | 17 | $the_key = $f['strtolower']( $word ); |
|
256 | |||
257 | // If this is a capitalized word, and settings do not allow hyphenation of such, abort! |
||
258 | // Note: This is different than uppercase words, where we are looking for title case. |
||
259 | 17 | if ( ! $hyphenate_title_case && $the_key !== $word ) { |
|
260 | 3 | return $word; |
|
261 | } |
||
262 | |||
263 | // Determine pattern. |
||
264 | 14 | if ( isset( $this->merged_exception_patterns[ $the_key ] ) ) { |
|
265 | // Give preference to exceptions. |
||
266 | 6 | $pattern = $this->merged_exception_patterns[ $the_key ]; |
|
267 | } else { |
||
268 | // Lookup word pattern if there is no exception. |
||
269 | 9 | $pattern = $this->lookup_word_pattern( $the_key, $f['strlen'], $f['str_split'] ); |
|
270 | } |
||
271 | |||
272 | // Add hyphen character based on pattern. |
||
273 | 14 | $word_parts = $f['str_split']( $word, 1 ); |
|
274 | 14 | $hyphenated_word = ''; |
|
275 | |||
276 | 14 | for ( $i = 0; $i < $word_length; $i++ ) { |
|
277 | 14 | if ( isset( $pattern[ $i ] ) && self::is_odd( $pattern[ $i ] ) && ( $i >= $min_before ) && ( $i <= $word_length - $min_after ) ) { |
|
278 | 14 | $hyphenated_word .= $hyphen; |
|
279 | } |
||
280 | |||
281 | 14 | $hyphenated_word .= $word_parts[ $i ]; |
|
282 | } |
||
283 | |||
284 | 14 | return $hyphenated_word; |
|
285 | } |
||
286 | |||
287 | /** |
||
288 | * Lookup the pattern for a word via the trie. |
||
289 | * |
||
290 | * @param string $key The search key (lowercase word). |
||
291 | * @param callable $strlen A function equivalent to `strlen` for the appropriate encoding. |
||
292 | * @param callable $str_split A function equivalent to `str_split` for the appropriate encoding. |
||
293 | * |
||
294 | * @return array The hyphenation pattern. |
||
295 | */ |
||
296 | 10 | protected function lookup_word_pattern( $key, callable $strlen, callable $str_split ) { |
|
297 | 10 | if ( null === $this->pattern_trie ) { |
|
298 | 1 | return []; // abort early. |
|
299 | } |
||
300 | |||
301 | // Add underscores to make out-of-index checks unnecessary, |
||
302 | // also hyphenation is done in lower case. |
||
303 | 9 | $search = '_' . $key . '_'; |
|
304 | 9 | $search_length = $strlen( $search ); |
|
305 | 9 | $chars = $str_split( $search ); |
|
306 | 9 | $word_pattern = []; |
|
307 | |||
308 | 9 | for ( $start = 0; $start < $search_length; ++$start ) { |
|
309 | // Start from the trie root node. |
||
310 | 9 | $node = $this->pattern_trie; |
|
311 | |||
312 | // Walk through the trie while storing detected patterns. |
||
313 | 9 | for ( $step = $start; $step < $search_length; ++$step ) { |
|
314 | // No further path in the trie. |
||
315 | 9 | if ( ! $node->exists( $chars[ $step ] ) ) { |
|
316 | 9 | break; |
|
317 | } |
||
318 | |||
319 | // Look for next character. |
||
320 | 9 | $node = $node->get_node( $chars[ $step ] ); |
|
321 | |||
322 | // Merge different offset values and keep maximum. |
||
323 | 9 | foreach ( $node->offsets() as $pattern_offset ) { |
|
324 | 9 | $value = $pattern_offset[0]; |
|
325 | 9 | $offset = $pattern_offset[1] + $start - 1; |
|
326 | |||
327 | 9 | $word_pattern[ $offset ] = isset( $word_pattern[ $offset ] ) ? max( $word_pattern[ $offset ], $value ) : $value; |
|
328 | } |
||
329 | } |
||
330 | } |
||
331 | |||
332 | 9 | return $word_pattern; |
|
333 | } |
||
334 | |||
335 | /** |
||
336 | * Merges hyphenation exceptions from the language file and custom hyphenation exceptions and |
||
337 | * generates patterns for all of them. |
||
338 | */ |
||
339 | 1 | protected function merge_hyphenation_exceptions() { |
|
340 | 1 | $exceptions = []; |
|
341 | |||
342 | // Merge custom and language specific word hyphenations. |
||
343 | 1 | if ( ! empty( $this->pattern_exceptions ) && ! empty( $this->custom_exceptions ) ) { |
|
344 | 1 | $exceptions = array_merge( $this->custom_exceptions, $this->pattern_exceptions ); |
|
345 | 1 | } elseif ( ! empty( $this->pattern_exceptions ) ) { |
|
346 | 1 | $exceptions = $this->pattern_exceptions; |
|
347 | 1 | } elseif ( ! empty( $this->custom_exceptions ) ) { |
|
348 | 1 | $exceptions = $this->custom_exceptions; |
|
349 | } |
||
350 | |||
351 | // Update patterns as well. |
||
352 | 1 | $exception_patterns = []; |
|
353 | 1 | foreach ( $exceptions as $exception_key => $exception ) { |
|
354 | 1 | $exception_patterns[ $exception_key ] = self::convert_hyphenation_exception_to_pattern( $exception ); |
|
355 | } |
||
356 | |||
357 | 1 | $this->merged_exception_patterns = $exception_patterns; |
|
358 | 1 | } |
|
359 | |||
360 | /** |
||
361 | * Generates a hyphenation pattern from an exception. |
||
362 | * |
||
363 | * @param string $exception A hyphenation exception in the form "foo-bar". Needs to be encoded in ASCII or UTF-8. |
||
364 | * |
||
365 | * @return array|null Returns the hyphenation pattern or null if `$exception` is using an invalid encoding. |
||
366 | */ |
||
367 | 2 | protected static function convert_hyphenation_exception_to_pattern( $exception ) { |
|
368 | 2 | $f = Strings::functions( $exception ); |
|
369 | 2 | if ( empty( $f ) ) { |
|
370 | 1 | return null; // unknown encoding, abort. |
|
371 | } |
||
372 | |||
373 | // Set the word_pattern - this method keeps any contextually important capitalization. |
||
374 | 1 | $lowercase_hyphened_word_parts = $f['str_split']( $exception, 1 ); |
|
375 | 1 | $lowercase_hyphened_word_length = $f['strlen']( $exception ); |
|
376 | |||
377 | 1 | $word_pattern = []; |
|
378 | 1 | $index = 0; |
|
379 | |||
380 | 1 | for ( $i = 0; $i < $lowercase_hyphened_word_length; $i++ ) { |
|
381 | 1 | if ( '-' === $lowercase_hyphened_word_parts[ $i ] ) { |
|
382 | 1 | $word_pattern[ $index ] = 9; |
|
383 | } else { |
||
384 | 1 | $index++; |
|
385 | } |
||
386 | } |
||
387 | |||
388 | 1 | return $word_pattern; |
|
389 | } |
||
390 | |||
391 | /** |
||
392 | * Is a number odd? |
||
393 | * |
||
394 | * @param int $number Required. |
||
395 | * |
||
396 | * @return bool true if $number is odd, false if it is even. |
||
397 | */ |
||
398 | 6 | protected static function is_odd( $number ) { |
|
400 | } |
||
401 | } |
||
402 |