We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
Total Complexity | 50 |
Total Lines | 338 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Pattern_Converter often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Pattern_Converter, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
37 | class Pattern_Converter { |
||
38 | |||
39 | /** |
||
40 | * Pattern file URL(s) to fetch. |
||
41 | * |
||
42 | * @since 6.1.0 |
||
43 | * |
||
44 | * @var string[] |
||
45 | */ |
||
46 | protected $urls; |
||
47 | |||
48 | /** |
||
49 | * Human-readable language name. |
||
50 | * |
||
51 | * @var string |
||
52 | */ |
||
53 | protected $language; |
||
54 | |||
55 | /** |
||
56 | * Allowed word characters in PCRE syntax. |
||
57 | * |
||
58 | * @var string |
||
59 | */ |
||
60 | protected $word_characters; |
||
61 | |||
62 | /** |
||
63 | * Creates a new converter object. |
||
64 | * |
||
65 | * @param string|string[] $urls The TeX pattern file URL(s). |
||
66 | * @param string $language A human-readable language name. |
||
67 | */ |
||
68 | public function __construct( $urls, $language ) { |
||
69 | $this->urls = (array) $urls; |
||
70 | $this->language = $language; |
||
71 | |||
72 | $this->word_characters = join( '', [ |
||
73 | "\w.'ʼ᾽ʼ᾿’", |
||
74 | Strings::uchr( 8205, 8204, 768, 769, 771, 772, 775, 776, 784, 803, 805, 814, 817 ), |
||
75 | '\p{Mn}', |
||
76 | '\p{Bengali}', |
||
77 | '\p{Cyrillic}' . Strings::uchr( 7296, 7297, 7298, 7299, 7300, 7301, 7302, 7303, 7304, 65070, 65071 ), |
||
78 | '\p{Devanagari}' . Strings::uchr( 2385, 2386 ), |
||
79 | '\p{Ethiopic}', |
||
80 | '\p{Gujarati}', |
||
81 | '\p{Gurmukhi}', |
||
82 | '\p{Kannada}', |
||
83 | '\p{Malayalam}', |
||
84 | '\p{Oriya}', |
||
85 | '\p{Tamil}', |
||
86 | '\p{Telugu}', |
||
87 | '\p{Thai}', |
||
88 | '-', |
||
89 | ] ); |
||
90 | } |
||
91 | |||
92 | /** |
||
93 | * Retrieve patgen segment from TeX hyphenation pattern. |
||
94 | * |
||
95 | * @param string $pattern TeX hyphenation pattern. |
||
96 | * @return string |
||
97 | */ |
||
98 | protected function get_segment( $pattern ) { |
||
99 | return preg_replace( '/[0-9]/', '', str_replace( '.', '_', $pattern ) ); |
||
100 | } |
||
101 | |||
102 | /** |
||
103 | * Calculate patgen sequence from TeX hyphenation pattern. |
||
104 | * |
||
105 | * @param string $pattern TeX hyphenation pattern. |
||
106 | * |
||
107 | * @throws \RangeException Thrown when the calculated pattern length is invalid. |
||
108 | * |
||
109 | * @return string |
||
110 | */ |
||
111 | protected function get_sequence( $pattern ) { |
||
112 | $characters = Strings::mb_str_split( str_replace( '.', '_', $pattern ) ); |
||
113 | $result = []; |
||
114 | |||
115 | foreach ( $characters as $index => $chr ) { |
||
116 | if ( ctype_digit( $chr ) ) { |
||
117 | $result[] = $chr; |
||
118 | } else { |
||
119 | // Append '0' if this is the first character or the previous character was not a number. |
||
120 | if ( ! isset( $characters[ $index - 1 ] ) || ! ctype_digit( $characters[ $index - 1 ] ) ) { |
||
121 | $result[] = '0'; |
||
122 | } |
||
123 | |||
124 | // Append '0' if this is the last character. |
||
125 | if ( ! isset( $characters[ $index + 1 ] ) ) { |
||
126 | $result[] = '0'; |
||
127 | } |
||
128 | } |
||
129 | } |
||
130 | |||
131 | // Do some error checking. |
||
132 | $count = count( $result ); |
||
133 | $count_seg = mb_strlen( $this->get_segment( $pattern ) ); |
||
134 | $sequence = implode( '', $result ); |
||
135 | |||
136 | if ( $count !== $count_seg + 1 ) { |
||
137 | throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." ); |
||
138 | } |
||
139 | |||
140 | return $sequence; |
||
141 | } |
||
142 | |||
143 | /** |
||
144 | * Format hyphenation pattern file for wp-Typography. |
||
145 | * |
||
146 | * @param array $patterns An array of TeX hyphenation patterns. |
||
147 | * @param array $exceptions { |
||
148 | * An array of hyphenation exceptions. |
||
149 | * |
||
150 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing'). |
||
151 | * } |
||
152 | * @param array $comments An array of TeX comments. |
||
153 | * |
||
154 | * @return string |
||
155 | */ |
||
156 | protected function format_results( array $patterns, array $exceptions, array $comments ) { |
||
157 | $pattern_mapping = []; |
||
158 | |||
159 | foreach ( $patterns as $pattern ) { |
||
160 | $segment = $this->get_segment( $pattern ); |
||
161 | |||
162 | if ( ! isset( $pattern_mapping[ $segment ] ) ) { |
||
163 | $pattern_mapping[ $segment ] = $this->get_sequence( $pattern ); |
||
164 | } |
||
165 | } |
||
166 | |||
167 | // Produce a nice exceptions mapping. |
||
168 | $json_exceptions = []; |
||
169 | foreach ( $exceptions as $exception ) { |
||
170 | $json_exceptions[ mb_strtolower( str_replace( '-', '', $exception ) ) ] = mb_strtolower( $exception ); |
||
171 | } |
||
172 | |||
173 | $json_results = [ |
||
174 | 'language' => $this->language, |
||
175 | 'source_url' => count( $this->urls ) > 1 ? $this->urls : $this->urls[0], |
||
176 | 'copyright' => array_map( 'rtrim', $comments ), |
||
177 | 'exceptions' => $json_exceptions, |
||
178 | 'patterns' => $pattern_mapping, |
||
179 | ]; |
||
180 | |||
181 | return json_encode( $json_results, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ); |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * Try to match squences of TeX hyphenation exceptions. |
||
186 | * |
||
187 | * @param string $line A line from the TeX pattern file. |
||
188 | * @param array $exceptions { |
||
189 | * An array of hyphenation exceptions. |
||
190 | * |
||
191 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing'). |
||
192 | * } |
||
193 | * @param int $line_no Optional. Line number. Default 0. |
||
194 | * |
||
195 | * @throws \RangeException Thrown when the exception line is malformed. |
||
196 | * |
||
197 | * @return bool |
||
198 | */ |
||
199 | protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) { |
||
200 | if ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
201 | $exceptions[] = $matches[1]; |
||
202 | return false; |
||
203 | } if ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
204 | $this->match_exceptions( $matches[1], $exceptions, $line_no ); |
||
205 | return false; |
||
206 | } elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
207 | return false; |
||
208 | } elseif ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
209 | $exceptions[] = $matches[1]; |
||
210 | } elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)(?:%.*)?$/u', $line, $matches ) ) { |
||
211 | // Sometimes there are multiple exceptions on a single line. |
||
212 | foreach ( self::split_at_whitespace( $matches[1] ) as $match ) { |
||
213 | $exceptions[] = $match; |
||
214 | } |
||
215 | } elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
216 | // Ignore comments and whitespace in exceptions. |
||
217 | return true; |
||
218 | } else { |
||
219 | throw new \RangeException( "Error: unknown exception $line on line $line_no\n" ); |
||
220 | } |
||
221 | |||
222 | return true; |
||
223 | } |
||
224 | |||
225 | /** |
||
226 | * Try to match a pattern. |
||
227 | * |
||
228 | * @param string $line A line from the TeX pattern file. |
||
229 | * @param array $patterns An array of patterns. |
||
230 | * @param int $line_no Optional. Line number. Default 0. |
||
231 | * |
||
232 | * @throws \RangeException Thrown when the pattern line is malformed. |
||
233 | * |
||
234 | * @return bool |
||
235 | */ |
||
236 | protected function match_patterns( $line, array &$patterns, $line_no = 0 ) { |
||
257 | } |
||
258 | |||
259 | /** |
||
260 | * Replace macros in the given line. |
||
261 | * |
||
262 | * @since 6.1.0 |
||
263 | * |
||
264 | * @param string $line The input string. |
||
265 | * @param string[] $macros The macros. |
||
266 | * |
||
267 | * @return string |
||
268 | */ |
||
269 | protected function expand_macros( $line, array $macros ) { |
||
270 | if ( 0 < preg_match_all( '/\\\(?<name>\w+)\{(?<arg>[^\}]+)\}/u', $line, $matches, PREG_SET_ORDER ) ) { |
||
271 | foreach ( $matches as $m ) { |
||
272 | if ( ! empty( $macros[ $m['name'] ] ) ) { |
||
273 | $expanded = preg_replace( '/#1/', $m['arg'], $macros[ $m['name'] ] ); |
||
274 | $pattern = preg_quote( $m[0] ); |
||
275 | $line = preg_replace( "/{$pattern}/u", $expanded, $line ); |
||
276 | } |
||
277 | } |
||
278 | } |
||
279 | |||
280 | return $line; |
||
281 | } |
||
282 | |||
283 | /** |
||
284 | * Split line (fragment) at whitespace. |
||
285 | * |
||
286 | * @param string $line A line (fragment). |
||
287 | * |
||
288 | * @return array |
||
289 | */ |
||
290 | private static function split_at_whitespace( $line ) { |
||
291 | // We can safely cast to an array here, as long as $line convertible to a string. |
||
292 | return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY ); |
||
293 | } |
||
294 | |||
295 | /** |
||
296 | * Convert the given TeX files. |
||
297 | * |
||
298 | * @throws \RangeException Thrown when a line cannot be parsed at all. |
||
299 | * @throws \RuntimeException Thrown when file does not exist. |
||
300 | * |
||
301 | * @return string |
||
302 | */ |
||
303 | public function convert() { |
||
304 | // Results. |
||
305 | $comments = []; |
||
306 | $patterns = []; |
||
307 | $exceptions = []; |
||
308 | |||
309 | foreach ( $this->urls as $url ) { |
||
310 | $this->convert_single_file( $url, $patterns, $exceptions, $comments ); |
||
311 | } |
||
312 | |||
313 | return $this->format_results( $patterns, $exceptions, $comments ); |
||
314 | } |
||
315 | |||
316 | /** |
||
317 | * Convert the given TeX file. |
||
318 | * |
||
319 | * @since 6.1.0 |
||
320 | * |
||
321 | * @param string $url Pattern file URL. |
||
322 | * @param string[] $patterns Extracted pattern lines. Passed by reference. |
||
323 | * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference. |
||
324 | * @param string[] $comments Extracted comments lines. Passed by reference. |
||
325 | * |
||
326 | * @throws \RangeException Thrown when a line cannot be parsed at all. |
||
327 | * @throws \RuntimeException Thrown when file does not exist. |
||
328 | */ |
||
329 | protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) { |
||
375 | } |
||
376 | } |
||
377 | } |
||
378 | } |
||
380 |