We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
| Total Complexity | 53 |
| Total Lines | 355 |
| Duplicated Lines | 0 % |
| Coverage | 96.64% |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Hyphenator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Hyphenator, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 45 | class Hyphenator { |
||
| 46 | |||
| 47 | /** |
||
| 48 | * The hyphenation patterns, stored in a trie for easier searching. |
||
| 49 | * |
||
| 50 | * @var Trie_Node|null |
||
| 51 | */ |
||
| 52 | protected $pattern_trie; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * The hyphenation exceptions from the pattern file. |
||
| 56 | * Stored as an array of "hy-phen-at-ed" strings. |
||
| 57 | * |
||
| 58 | * @var array |
||
| 59 | */ |
||
| 60 | protected $pattern_exceptions; |
||
| 61 | |||
| 62 | /** |
||
| 63 | * Custom hyphenation exceptions set by the user. |
||
| 64 | * Stored as an array of "hy-phen-at-ed" strings. |
||
| 65 | * |
||
| 66 | * @var array |
||
| 67 | */ |
||
| 68 | protected $custom_exceptions; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * A binary hash of $custom_exceptions array. |
||
| 72 | * |
||
| 73 | * @var string |
||
| 74 | */ |
||
| 75 | protected $custom_exceptions_hash; |
||
| 76 | |||
| 77 | /** |
||
| 78 | * Patterns calculated from the merged hyphenation exceptions. |
||
| 79 | * |
||
| 80 | * @var array|null |
||
| 81 | */ |
||
| 82 | protected $merged_exception_patterns; |
||
| 83 | |||
| 84 | /** |
||
| 85 | * The current hyphenation language. |
||
| 86 | * Stored in the short form (e.g. "en-US"). |
||
| 87 | * |
||
| 88 | * @var string|null |
||
| 89 | */ |
||
| 90 | protected $language; |
||
| 91 | |||
| 92 | /** |
||
| 93 | * Constructs new Hyphenator instance. |
||
| 94 | * |
||
| 95 | * @param string|null $language Optional. Short-form language name. Default null. |
||
| 96 | * @param array $exceptions Optional. Custom hyphenation exceptions. Default empty array. |
||
| 97 | */ |
||
| 98 | 1 | public function __construct( $language = null, array $exceptions = [] ) { |
|
| 99 | |||
| 100 | 1 | if ( ! empty( $language ) ) { |
|
| 101 | 1 | $this->set_language( $language ); |
|
| 102 | } |
||
| 103 | |||
| 104 | 1 | if ( ! empty( $exceptions ) ) { |
|
| 105 | 1 | $this->set_custom_exceptions( $exceptions ); |
|
| 106 | } |
||
| 107 | 1 | } |
|
| 108 | |||
| 109 | /** |
||
| 110 | * Sets custom word hyphenations. |
||
| 111 | * |
||
| 112 | * @param array $exceptions Optional. An array of words with all hyphenation points marked with a hard hyphen. Default empty array. |
||
| 113 | */ |
||
| 114 | 4 | public function set_custom_exceptions( array $exceptions = [] ) { |
|
| 115 | 4 | if ( empty( $exceptions ) && empty( $this->custom_exceptions ) ) { |
|
| 116 | 1 | return; // Nothing to do at all. |
|
| 117 | } |
||
| 118 | |||
| 119 | // Calculate hash & check against previous exceptions. |
||
| 120 | 3 | $new_hash = self::get_object_hash( $exceptions ); |
|
| 121 | 3 | if ( $this->custom_exceptions_hash === $new_hash ) { |
|
| 122 | 1 | return; // No need to update exceptions. |
|
| 123 | } |
||
| 124 | |||
| 125 | // Do our thing. |
||
| 126 | 3 | $exception_keys = []; |
|
| 127 | 3 | foreach ( $exceptions as $exception ) { |
|
| 128 | 3 | $f = Strings::functions( $exception ); |
|
| 129 | 3 | if ( empty( $f ) ) { |
|
| 130 | 1 | continue; // unknown encoding, abort. |
|
| 131 | } |
||
| 132 | |||
| 133 | 3 | $exception = $f['strtolower']( $exception ); |
|
| 134 | 3 | $exception_keys[ $exception ] = \preg_replace( "#-#{$f['u']}", '', $exception ); |
|
| 135 | } |
||
| 136 | |||
| 137 | // Update exceptions. |
||
| 138 | 3 | $this->custom_exceptions = \array_flip( $exception_keys ); |
|
| 139 | 3 | $this->custom_exceptions_hash = $new_hash; |
|
| 140 | |||
| 141 | // Force remerging of patgen and custom exception patterns. |
||
| 142 | 3 | $this->merged_exception_patterns = null; |
|
| 143 | 3 | } |
|
| 144 | |||
| 145 | /** |
||
| 146 | * Calculates binary-safe hash from data object. |
||
| 147 | * |
||
| 148 | * @param mixed $object Any datatype. |
||
| 149 | * |
||
| 150 | * @return string |
||
| 151 | */ |
||
| 152 | 1 | protected static function get_object_hash( $object ) { |
|
| 153 | 1 | return \md5( \json_encode( $object ), false ); |
|
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Sets the hyphenation pattern language. |
||
| 158 | * |
||
| 159 | * @param string $lang Optional. Has to correspond to a filename in 'lang'. Default 'en-US'. |
||
| 160 | * |
||
| 161 | * @return bool Whether loading the pattern file was successful. |
||
| 162 | */ |
||
| 163 | 3 | public function set_language( $lang = 'en-US' ) { |
|
| 164 | 3 | if ( isset( $this->language ) && $this->language === $lang ) { |
|
| 165 | 1 | return true; // Bail out, no need to do anything. |
|
| 166 | } |
||
| 167 | |||
| 168 | 3 | $success = false; |
|
| 169 | 3 | $language_file_name = \dirname( __FILE__ ) . '/lang/' . $lang . '.json'; |
|
| 170 | |||
| 171 | 3 | if ( \file_exists( $language_file_name ) ) { |
|
| 172 | 3 | $raw_language_file = \file_get_contents( $language_file_name ); |
|
| 173 | |||
| 174 | 3 | if ( false !== $raw_language_file ) { |
|
| 175 | 3 | $language_file = \json_decode( $raw_language_file, true ); |
|
| 176 | |||
| 177 | 3 | if ( false !== $language_file ) { |
|
| 178 | 3 | $this->language = $lang; |
|
| 179 | 3 | $this->pattern_trie = Trie_Node::build_trie( $language_file['patterns'] ); |
|
| 180 | 3 | $this->pattern_exceptions = $language_file['exceptions']; |
|
| 181 | |||
| 182 | 3 | $success = true; |
|
| 183 | } |
||
| 184 | } |
||
| 185 | } |
||
| 186 | |||
| 187 | // Clean up. |
||
| 188 | 3 | if ( ! $success ) { |
|
| 189 | 1 | $this->language = null; |
|
| 190 | 1 | $this->pattern_trie = null; |
|
| 191 | 1 | $this->pattern_exceptions = []; |
|
| 192 | } |
||
| 193 | |||
| 194 | // Make sure hyphenationExceptions is not set to force remerging of patgen and custom exceptions. |
||
| 195 | 3 | $this->merged_exception_patterns = null; |
|
| 196 | |||
| 197 | 3 | return $success; |
|
| 198 | } |
||
| 199 | |||
| 200 | /** |
||
| 201 | * Hyphenates parsed text tokens. |
||
| 202 | * |
||
| 203 | * @param array $parsed_text_tokens An array of text tokens. |
||
| 204 | * @param string $hyphen Optional. The hyphen character. Default '-'. |
||
| 205 | * @param bool $hyphenate_title_case Optional. Whether words in Title Case should be hyphenated. Default false. |
||
| 206 | * @param int $min_length Optional. Minimum word length for hyphenation. Default 2. |
||
| 207 | * @param int $min_before Optional. Minimum number of characters before a hyphenation point. Default 2. |
||
| 208 | * @param int $min_after Optional. Minimum number of characters after a hyphenation point. Default 2. |
||
| 209 | * |
||
| 210 | * @return Token[] The modified text tokens. |
||
| 211 | */ |
||
| 212 | 19 | public function hyphenate( array $parsed_text_tokens, $hyphen = '-', $hyphenate_title_case = false, $min_length = 2, $min_before = 2, $min_after = 2 ) { |
|
| 213 | 19 | if ( empty( $min_length ) || empty( $min_before ) || ! isset( $this->pattern_trie ) || ! isset( $this->pattern_exceptions ) ) { |
|
| 214 | 1 | return $parsed_text_tokens; |
|
| 215 | } |
||
| 216 | |||
| 217 | // Make sure we have full exceptions list. |
||
| 218 | 18 | if ( ! isset( $this->merged_exception_patterns ) ) { |
|
| 219 | 18 | $this->merge_hyphenation_exceptions(); |
|
| 220 | } |
||
| 221 | |||
| 222 | 18 | foreach ( $parsed_text_tokens as $key => $text_token ) { |
|
| 223 | 18 | $parsed_text_tokens[ $key ] = $text_token->with_value( $this->hyphenate_word( $text_token->value, $hyphen, $hyphenate_title_case, $min_length, $min_before, $min_after ) ); |
|
| 224 | } |
||
| 225 | |||
| 226 | 18 | return $parsed_text_tokens; |
|
| 227 | } |
||
| 228 | |||
| 229 | /** |
||
| 230 | * Hyphenates a single word. |
||
| 231 | * |
||
| 232 | * @param string $word The word to hyphenate. |
||
| 233 | * @param string $hyphen The hyphen character. |
||
| 234 | * @param bool $hyphenate_title_case Whether words in Title Case should be hyphenated. |
||
| 235 | * @param int $min_length Minimum word length for hyphenation. |
||
| 236 | * @param int $min_before Minimum number of characters before a hyphenation point. |
||
| 237 | * @param int $min_after Minimum number of characters after a hyphenation point. |
||
| 238 | * |
||
| 239 | * @return string |
||
| 240 | */ |
||
| 241 | 18 | protected function hyphenate_word( $word, $hyphen, $hyphenate_title_case, $min_length, $min_before, $min_after ) { |
|
| 242 | // Quickly reference string functions according to encoding. |
||
| 243 | 18 | $f = Strings::functions( $word ); |
|
| 244 | 18 | if ( empty( $f ) ) { |
|
| 245 | 1 | return $word; // unknown encoding, abort. |
|
| 246 | } |
||
| 247 | |||
| 248 | // Check word length. |
||
| 249 | 18 | $word_length = $f['strlen']( $word ); |
|
| 250 | 18 | if ( $word_length < $min_length ) { |
|
| 251 | 4 | return $word; |
|
| 252 | } |
||
| 253 | |||
| 254 | // Trie lookup requires a lowercase search term. |
||
| 255 | 17 | $the_key = $f['strtolower']( $word ); |
|
| 256 | |||
| 257 | // If this is a capitalized word, and settings do not allow hyphenation of such, abort! |
||
| 258 | // Note: This is different than uppercase words, where we are looking for title case. |
||
| 259 | 17 | if ( ! $hyphenate_title_case && $the_key !== $word ) { |
|
| 260 | 3 | return $word; |
|
| 261 | } |
||
| 262 | |||
| 263 | // Determine pattern. |
||
| 264 | 14 | if ( isset( $this->merged_exception_patterns[ $the_key ] ) ) { |
|
| 265 | // Give preference to exceptions. |
||
| 266 | 6 | $pattern = $this->merged_exception_patterns[ $the_key ]; |
|
| 267 | } else { |
||
| 268 | // Lookup word pattern if there is no exception. |
||
| 269 | 9 | $pattern = $this->lookup_word_pattern( $the_key, $f['strlen'], $f['str_split'] ); |
|
| 270 | } |
||
| 271 | |||
| 272 | // Add hyphen character based on pattern. |
||
| 273 | 14 | $word_parts = $f['str_split']( $word, 1 ); |
|
| 274 | 14 | $hyphenated_word = ''; |
|
| 275 | |||
| 276 | 14 | for ( $i = 0; $i < $word_length; $i++ ) { |
|
| 277 | 14 | if ( isset( $pattern[ $i ] ) && self::is_odd( $pattern[ $i ] ) && ( $i >= $min_before ) && ( $i <= $word_length - $min_after ) ) { |
|
| 278 | 14 | $hyphenated_word .= $hyphen; |
|
| 279 | } |
||
| 280 | |||
| 281 | 14 | $hyphenated_word .= $word_parts[ $i ]; |
|
| 282 | } |
||
| 283 | |||
| 284 | 14 | return $hyphenated_word; |
|
| 285 | } |
||
| 286 | |||
| 287 | /** |
||
| 288 | * Lookup the pattern for a word via the trie. |
||
| 289 | * |
||
| 290 | * @param string $key The search key (lowercase word). |
||
| 291 | * @param callable $strlen A function equivalent to `strlen` for the appropriate encoding. |
||
| 292 | * @param callable $str_split A function equivalent to `str_split` for the appropriate encoding. |
||
| 293 | * |
||
| 294 | * @return array The hyphenation pattern. |
||
| 295 | */ |
||
| 296 | 10 | protected function lookup_word_pattern( $key, callable $strlen, callable $str_split ) { |
|
| 297 | 10 | if ( null === $this->pattern_trie ) { |
|
| 298 | 1 | return []; // abort early. |
|
| 299 | } |
||
| 300 | |||
| 301 | // Add underscores to make out-of-index checks unnecessary, |
||
| 302 | // also hyphenation is done in lower case. |
||
| 303 | 9 | $search = '_' . $key . '_'; |
|
| 304 | 9 | $search_length = $strlen( $search ); |
|
| 305 | 9 | $chars = $str_split( $search ); |
|
| 306 | 9 | $word_pattern = []; |
|
| 307 | |||
| 308 | 9 | for ( $start = 0; $start < $search_length; ++$start ) { |
|
| 309 | // Start from the trie root node. |
||
| 310 | 9 | $node = $this->pattern_trie; |
|
| 311 | |||
| 312 | // Walk through the trie while storing detected patterns. |
||
| 313 | 9 | for ( $step = $start; $step < $search_length; ++$step ) { |
|
| 314 | // No further path in the trie. |
||
| 315 | 9 | if ( ! $node->exists( $chars[ $step ] ) ) { |
|
| 316 | 9 | break; |
|
| 317 | } |
||
| 318 | |||
| 319 | // Look for next character. |
||
| 320 | 9 | $node = $node->get_node( $chars[ $step ] ); |
|
| 321 | |||
| 322 | // Merge different offset values and keep maximum. |
||
| 323 | 9 | foreach ( $node->offsets() as $pattern_offset ) { |
|
| 324 | 9 | $value = $pattern_offset[0]; |
|
| 325 | 9 | $offset = $pattern_offset[1] + $start - 1; |
|
| 326 | |||
| 327 | 9 | $word_pattern[ $offset ] = isset( $word_pattern[ $offset ] ) ? max( $word_pattern[ $offset ], $value ) : $value; |
|
| 328 | } |
||
| 329 | } |
||
| 330 | } |
||
| 331 | |||
| 332 | 9 | return $word_pattern; |
|
| 333 | } |
||
| 334 | |||
| 335 | /** |
||
| 336 | * Merges hyphenation exceptions from the language file and custom hyphenation exceptions and |
||
| 337 | * generates patterns for all of them. |
||
| 338 | */ |
||
| 339 | 1 | protected function merge_hyphenation_exceptions() { |
|
| 340 | 1 | $exceptions = []; |
|
| 341 | |||
| 342 | // Merge custom and language specific word hyphenations. |
||
| 343 | 1 | if ( ! empty( $this->pattern_exceptions ) && ! empty( $this->custom_exceptions ) ) { |
|
| 344 | 1 | $exceptions = array_merge( $this->custom_exceptions, $this->pattern_exceptions ); |
|
| 345 | } elseif ( ! empty( $this->pattern_exceptions ) ) { |
||
| 346 | $exceptions = $this->pattern_exceptions; |
||
| 347 | } elseif ( ! empty( $this->custom_exceptions ) ) { |
||
| 348 | $exceptions = $this->custom_exceptions; |
||
| 349 | } |
||
| 350 | |||
| 351 | // Update patterns as well. |
||
| 352 | 1 | $exception_patterns = []; |
|
| 353 | 1 | foreach ( $exceptions as $exception_key => $exception ) { |
|
| 354 | 1 | $exception_patterns[ $exception_key ] = self::convert_hyphenation_exception_to_pattern( $exception ); |
|
| 355 | } |
||
| 356 | |||
| 357 | 1 | $this->merged_exception_patterns = $exception_patterns; |
|
| 358 | 1 | } |
|
| 359 | |||
| 360 | /** |
||
| 361 | * Generates a hyphenation pattern from an exception. |
||
| 362 | * |
||
| 363 | * @param string $exception A hyphenation exception in the form "foo-bar". Needs to be encoded in ASCII or UTF-8. |
||
| 364 | * |
||
| 365 | * @return array|null Returns the hyphenation pattern or null if `$exception` is using an invalid encoding. |
||
| 366 | */ |
||
| 367 | 2 | protected static function convert_hyphenation_exception_to_pattern( $exception ) { |
|
| 368 | 2 | $f = Strings::functions( $exception ); |
|
| 369 | 2 | if ( empty( $f ) ) { |
|
| 370 | 1 | return null; // unknown encoding, abort. |
|
| 371 | } |
||
| 372 | |||
| 373 | // Set the word_pattern - this method keeps any contextually important capitalization. |
||
| 374 | 1 | $lowercase_hyphened_word_parts = $f['str_split']( $exception, 1 ); |
|
| 375 | 1 | $lowercase_hyphened_word_length = $f['strlen']( $exception ); |
|
| 376 | |||
| 377 | 1 | $word_pattern = []; |
|
| 378 | 1 | $index = 0; |
|
| 379 | |||
| 380 | 1 | for ( $i = 0; $i < $lowercase_hyphened_word_length; $i++ ) { |
|
| 381 | 1 | if ( '-' === $lowercase_hyphened_word_parts[ $i ] ) { |
|
| 382 | 1 | $word_pattern[ $index ] = 9; |
|
| 383 | } else { |
||
| 384 | 1 | $index++; |
|
| 385 | } |
||
| 386 | } |
||
| 387 | |||
| 388 | 1 | return $word_pattern; |
|
| 389 | } |
||
| 390 | |||
| 391 | /** |
||
| 392 | * Is a number odd? |
||
| 393 | * |
||
| 394 | * @param int $number Required. |
||
| 395 | * |
||
| 396 | * @return bool true if $number is odd, false if it is even. |
||
| 397 | */ |
||
| 398 | 6 | protected static function is_odd( $number ) { |
|
| 400 | } |
||
| 401 | } |
||
| 402 |