We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
| Total Complexity | 50 | 
| Total Lines | 338 | 
| Duplicated Lines | 0 % | 
| Changes | 0 | ||
Complex classes like Pattern_Converter often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Pattern_Converter, and based on these observations, apply Extract Interface, too.
| 1 | <?php  | 
            ||
| 37 | class Pattern_Converter { | 
            ||
| 38 | |||
| 39 | /**  | 
            ||
| 40 | * Pattern file URL(s) to fetch.  | 
            ||
| 41 | *  | 
            ||
| 42 | * @since 6.1.0  | 
            ||
| 43 | *  | 
            ||
| 44 | * @var string[]  | 
            ||
| 45 | */  | 
            ||
| 46 | protected $urls;  | 
            ||
| 47 | |||
| 48 | /**  | 
            ||
| 49 | * Human-readable language name.  | 
            ||
| 50 | *  | 
            ||
| 51 | * @var string  | 
            ||
| 52 | */  | 
            ||
| 53 | protected $language;  | 
            ||
| 54 | |||
| 55 | /**  | 
            ||
| 56 | * Allowed word characters in PCRE syntax.  | 
            ||
| 57 | *  | 
            ||
| 58 | * @var string  | 
            ||
| 59 | */  | 
            ||
| 60 | protected $word_characters;  | 
            ||
| 61 | |||
| 62 | /**  | 
            ||
| 63 | * Creates a new converter object.  | 
            ||
| 64 | *  | 
            ||
| 65 | * @param string|string[] $urls The TeX pattern file URL(s).  | 
            ||
| 66 | * @param string $language A human-readable language name.  | 
            ||
| 67 | */  | 
            ||
| 68 | 	public function __construct( $urls, $language ) { | 
            ||
| 69 | $this->urls = (array) $urls;  | 
            ||
| 70 | $this->language = $language;  | 
            ||
| 71 | |||
| 72 | $this->word_characters = join( '', [  | 
            ||
| 73 | "\w.'ʼ᾽ʼ᾿’",  | 
            ||
| 74 | Strings::uchr( 8205, 8204, 768, 769, 771, 772, 775, 776, 784, 803, 805, 814, 817 ),  | 
            ||
| 75 | 			'\p{Mn}', | 
            ||
| 76 | 			'\p{Bengali}', | 
            ||
| 77 | 			'\p{Cyrillic}' . Strings::uchr( 7296, 7297, 7298, 7299, 7300, 7301, 7302, 7303, 7304, 65070, 65071 ), | 
            ||
| 78 | 			'\p{Devanagari}' . Strings::uchr( 2385, 2386 ), | 
            ||
| 79 | 			'\p{Ethiopic}', | 
            ||
| 80 | 			'\p{Gujarati}', | 
            ||
| 81 | 			'\p{Gurmukhi}', | 
            ||
| 82 | 			'\p{Kannada}', | 
            ||
| 83 | 			'\p{Malayalam}', | 
            ||
| 84 | 			'\p{Oriya}', | 
            ||
| 85 | 			'\p{Tamil}', | 
            ||
| 86 | 			'\p{Telugu}', | 
            ||
| 87 | 			'\p{Thai}', | 
            ||
| 88 | '-',  | 
            ||
| 89 | ] );  | 
            ||
| 90 | }  | 
            ||
| 91 | |||
| 92 | /**  | 
            ||
| 93 | * Retrieve patgen segment from TeX hyphenation pattern.  | 
            ||
| 94 | *  | 
            ||
| 95 | * @param string $pattern TeX hyphenation pattern.  | 
            ||
| 96 | * @return string  | 
            ||
| 97 | */  | 
            ||
| 98 | 	protected function get_segment( $pattern ) { | 
            ||
| 99 | return preg_replace( '/[0-9]/', '', str_replace( '.', '_', $pattern ) );  | 
            ||
| 100 | }  | 
            ||
| 101 | |||
| 102 | /**  | 
            ||
| 103 | * Calculate patgen sequence from TeX hyphenation pattern.  | 
            ||
| 104 | *  | 
            ||
| 105 | * @param string $pattern TeX hyphenation pattern.  | 
            ||
| 106 | *  | 
            ||
| 107 | * @throws \RangeException Thrown when the calculated pattern length is invalid.  | 
            ||
| 108 | *  | 
            ||
| 109 | * @return string  | 
            ||
| 110 | */  | 
            ||
| 111 | 	protected function get_sequence( $pattern ) { | 
            ||
| 112 | $characters = Strings::mb_str_split( str_replace( '.', '_', $pattern ) );  | 
            ||
| 113 | $result = [];  | 
            ||
| 114 | |||
| 115 | 		foreach ( $characters as $index => $chr ) { | 
            ||
| 116 | 			if ( ctype_digit( $chr ) ) { | 
            ||
| 117 | $result[] = $chr;  | 
            ||
| 118 | 			} else { | 
            ||
| 119 | // Append '0' if this is the first character or the previous character was not a number.  | 
            ||
| 120 | 				if ( ! isset( $characters[ $index - 1 ] ) || ! ctype_digit( $characters[ $index - 1 ] ) ) { | 
            ||
| 121 | $result[] = '0';  | 
            ||
| 122 | }  | 
            ||
| 123 | |||
| 124 | // Append '0' if this is the last character.  | 
            ||
| 125 | 				if ( ! isset( $characters[ $index + 1 ] ) ) { | 
            ||
| 126 | $result[] = '0';  | 
            ||
| 127 | }  | 
            ||
| 128 | }  | 
            ||
| 129 | }  | 
            ||
| 130 | |||
| 131 | // Do some error checking.  | 
            ||
| 132 | $count = count( $result );  | 
            ||
| 133 | $count_seg = mb_strlen( $this->get_segment( $pattern ) );  | 
            ||
| 134 | $sequence = implode( '', $result );  | 
            ||
| 135 | |||
| 136 | 		if ( $count !== $count_seg + 1 ) { | 
            ||
| 137 | throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." );  | 
            ||
| 138 | }  | 
            ||
| 139 | |||
| 140 | return $sequence;  | 
            ||
| 141 | }  | 
            ||
| 142 | |||
| 143 | /**  | 
            ||
| 144 | * Format hyphenation pattern file for wp-Typography.  | 
            ||
| 145 | *  | 
            ||
| 146 | * @param array $patterns An array of TeX hyphenation patterns.  | 
            ||
| 147 | 	 * @param array $exceptions { | 
            ||
| 148 | * An array of hyphenation exceptions.  | 
            ||
| 149 | *  | 
            ||
| 150 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing').  | 
            ||
| 151 | * }  | 
            ||
| 152 | * @param array $comments An array of TeX comments.  | 
            ||
| 153 | *  | 
            ||
| 154 | * @return string  | 
            ||
| 155 | */  | 
            ||
| 156 | 	protected function format_results( array $patterns, array $exceptions, array $comments ) { | 
            ||
| 157 | $pattern_mapping = [];  | 
            ||
| 158 | |||
| 159 | 		foreach ( $patterns as $pattern ) { | 
            ||
| 160 | $segment = $this->get_segment( $pattern );  | 
            ||
| 161 | |||
| 162 | 			if ( ! isset( $pattern_mapping[ $segment ] ) ) { | 
            ||
| 163 | $pattern_mapping[ $segment ] = $this->get_sequence( $pattern );  | 
            ||
| 164 | }  | 
            ||
| 165 | }  | 
            ||
| 166 | |||
| 167 | // Produce a nice exceptions mapping.  | 
            ||
| 168 | $json_exceptions = [];  | 
            ||
| 169 | 		foreach ( $exceptions as $exception ) { | 
            ||
| 170 | $json_exceptions[ mb_strtolower( str_replace( '-', '', $exception ) ) ] = mb_strtolower( $exception );  | 
            ||
| 171 | }  | 
            ||
| 172 | |||
| 173 | $json_results = [  | 
            ||
| 174 | 'language' => $this->language,  | 
            ||
| 175 | 'source_url' => count( $this->urls ) > 1 ? $this->urls : $this->urls[0],  | 
            ||
| 176 | 'copyright' => array_map( 'rtrim', $comments ),  | 
            ||
| 177 | 'exceptions' => $json_exceptions,  | 
            ||
| 178 | 'patterns' => $pattern_mapping,  | 
            ||
| 179 | ];  | 
            ||
| 180 | |||
| 181 | return json_encode( $json_results, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );  | 
            ||
| 182 | }  | 
            ||
| 183 | |||
| 184 | /**  | 
            ||
| 185 | * Try to match squences of TeX hyphenation exceptions.  | 
            ||
| 186 | *  | 
            ||
| 187 | * @param string $line A line from the TeX pattern file.  | 
            ||
| 188 | 	 * @param array  $exceptions { | 
            ||
| 189 | * An array of hyphenation exceptions.  | 
            ||
| 190 | *  | 
            ||
| 191 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing').  | 
            ||
| 192 | * }  | 
            ||
| 193 | * @param int $line_no Optional. Line number. Default 0.  | 
            ||
| 194 | *  | 
            ||
| 195 | * @throws \RangeException Thrown when the exception line is malformed.  | 
            ||
| 196 | *  | 
            ||
| 197 | * @return bool  | 
            ||
| 198 | */  | 
            ||
| 199 | 	protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) { | 
            ||
| 200 | 		if ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { | 
            ||
| 201 | $exceptions[] = $matches[1];  | 
            ||
| 202 | return false;  | 
            ||
| 203 | 		} if ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { | 
            ||
| 204 | $this->match_exceptions( $matches[1], $exceptions, $line_no );  | 
            ||
| 205 | return false;  | 
            ||
| 206 | 		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { | 
            ||
| 207 | return false;  | 
            ||
| 208 | 		} elseif ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*(?:%.*)?$/u',  $line, $matches ) ) { | 
            ||
| 209 | $exceptions[] = $matches[1];  | 
            ||
| 210 | 		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) { | 
            ||
| 211 | // Sometimes there are multiple exceptions on a single line.  | 
            ||
| 212 | 			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) { | 
            ||
| 213 | $exceptions[] = $match;  | 
            ||
| 214 | }  | 
            ||
| 215 | 		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) { | 
            ||
| 216 | // Ignore comments and whitespace in exceptions.  | 
            ||
| 217 | return true;  | 
            ||
| 218 | 		} else { | 
            ||
| 219 | throw new \RangeException( "Error: unknown exception $line on line $line_no\n" );  | 
            ||
| 220 | }  | 
            ||
| 221 | |||
| 222 | return true;  | 
            ||
| 223 | }  | 
            ||
| 224 | |||
| 225 | /**  | 
            ||
| 226 | * Try to match a pattern.  | 
            ||
| 227 | *  | 
            ||
| 228 | * @param string $line A line from the TeX pattern file.  | 
            ||
| 229 | * @param array $patterns An array of patterns.  | 
            ||
| 230 | * @param int $line_no Optional. Line number. Default 0.  | 
            ||
| 231 | *  | 
            ||
| 232 | * @throws \RangeException Thrown when the pattern line is malformed.  | 
            ||
| 233 | *  | 
            ||
| 234 | * @return bool  | 
            ||
| 235 | */  | 
            ||
| 236 | 	protected function match_patterns( $line, array &$patterns, $line_no = 0 ) { | 
            ||
| 257 | }  | 
            ||
| 258 | |||
| 259 | /**  | 
            ||
| 260 | * Replace macros in the given line.  | 
            ||
| 261 | *  | 
            ||
| 262 | * @since 6.1.0  | 
            ||
| 263 | *  | 
            ||
| 264 | * @param string $line The input string.  | 
            ||
| 265 | * @param string[] $macros The macros.  | 
            ||
| 266 | *  | 
            ||
| 267 | * @return string  | 
            ||
| 268 | */  | 
            ||
| 269 | 	protected function expand_macros( $line, array $macros ) { | 
            ||
| 270 | 		if ( 0 < preg_match_all( '/\\\(?<name>\w+)\{(?<arg>[^\}]+)\}/u', $line, $matches, PREG_SET_ORDER ) ) { | 
            ||
| 271 | 			foreach ( $matches as $m ) { | 
            ||
| 272 | 				if ( ! empty( $macros[ $m['name'] ] ) ) { | 
            ||
| 273 | $expanded = preg_replace( '/#1/', $m['arg'], $macros[ $m['name'] ] );  | 
            ||
| 274 | $pattern = preg_quote( $m[0] );  | 
            ||
| 275 | 					$line     = preg_replace( "/{$pattern}/u", $expanded, $line ); | 
            ||
| 276 | }  | 
            ||
| 277 | }  | 
            ||
| 278 | }  | 
            ||
| 279 | |||
| 280 | return $line;  | 
            ||
| 281 | }  | 
            ||
| 282 | |||
| 283 | /**  | 
            ||
| 284 | * Split line (fragment) at whitespace.  | 
            ||
| 285 | *  | 
            ||
| 286 | * @param string $line A line (fragment).  | 
            ||
| 287 | *  | 
            ||
| 288 | * @return array  | 
            ||
| 289 | */  | 
            ||
| 290 | 	private static function split_at_whitespace( $line ) { | 
            ||
| 291 | // We can safely cast to an array here, as long as $line convertible to a string.  | 
            ||
| 292 | return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY );  | 
            ||
| 293 | }  | 
            ||
| 294 | |||
| 295 | /**  | 
            ||
| 296 | * Convert the given TeX files.  | 
            ||
| 297 | *  | 
            ||
| 298 | * @throws \RangeException Thrown when a line cannot be parsed at all.  | 
            ||
| 299 | * @throws \RuntimeException Thrown when file does not exist.  | 
            ||
| 300 | *  | 
            ||
| 301 | * @return string  | 
            ||
| 302 | */  | 
            ||
| 303 | 	public function convert() { | 
            ||
| 304 | // Results.  | 
            ||
| 305 | $comments = [];  | 
            ||
| 306 | $patterns = [];  | 
            ||
| 307 | $exceptions = [];  | 
            ||
| 308 | |||
| 309 | 		foreach ( $this->urls as $url ) { | 
            ||
| 310 | $this->convert_single_file( $url, $patterns, $exceptions, $comments );  | 
            ||
| 311 | }  | 
            ||
| 312 | |||
| 313 | return $this->format_results( $patterns, $exceptions, $comments );  | 
            ||
| 314 | }  | 
            ||
| 315 | |||
| 316 | /**  | 
            ||
| 317 | * Convert the given TeX file.  | 
            ||
| 318 | *  | 
            ||
| 319 | * @since 6.1.0  | 
            ||
| 320 | *  | 
            ||
| 321 | * @param string $url Pattern file URL.  | 
            ||
| 322 | * @param string[] $patterns Extracted pattern lines. Passed by reference.  | 
            ||
| 323 | * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference.  | 
            ||
| 324 | * @param string[] $comments Extracted comments lines. Passed by reference.  | 
            ||
| 325 | *  | 
            ||
| 326 | * @throws \RangeException Thrown when a line cannot be parsed at all.  | 
            ||
| 327 | * @throws \RuntimeException Thrown when file does not exist.  | 
            ||
| 328 | */  | 
            ||
| 329 | 	protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) { | 
            ||
| 375 | }  | 
            ||
| 376 | }  | 
            ||
| 377 | }  | 
            ||
| 378 | }  | 
            ||
| 380 |