We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
Total Complexity | 50 |
Total Lines | 352 |
Duplicated Lines | 0 % |
Changes | 10 | ||
Bugs | 0 | Features | 0 |
Complex classes like Pattern_Converter often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Pattern_Converter, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
37 | class Pattern_Converter { |
||
38 | |||
39 | /** |
||
40 | * Pattern file URL(s) to fetch. |
||
41 | * |
||
42 | * @since 6.1.0 |
||
43 | * |
||
44 | * @var string[] |
||
45 | */ |
||
46 | protected $urls; |
||
47 | |||
48 | /** |
||
49 | * Human-readable language name. |
||
50 | * |
||
51 | * @var string |
||
52 | */ |
||
53 | protected $language; |
||
54 | |||
55 | /** |
||
56 | * A word character class in PCRE2 syntax. |
||
57 | * |
||
58 | * @var string |
||
59 | */ |
||
60 | protected $word_class; |
||
61 | |||
62 | /** |
||
63 | * Creates a new converter object. |
||
64 | * |
||
65 | * @param string|string[] $urls The TeX pattern file URL(s). |
||
66 | * @param string $language A human-readable language name. |
||
67 | */ |
||
68 | public function __construct( $urls, $language ) { |
||
69 | $this->urls = (array) $urls; |
||
70 | $this->language = $language; |
||
71 | |||
72 | // We need to use a non-matching group here because strangely PCRE2 does |
||
73 | // not allow the "script" classes to be used as part of a real character class. |
||
74 | $this->word_class = '(?:' . |
||
75 | \join( |
||
76 | '|', |
||
77 | [ |
||
78 | '\p{Xan}', // Alphanumeric characters. |
||
79 | "[.'ʼ᾽ʼ᾿’\-]", // Allowed punctuation. |
||
80 | '\p{S}', // Symbols. |
||
81 | '\p{Mn}', // Non-spacing marks (diacritics). |
||
82 | |||
83 | // Additional code points used by Non-latin scripts. |
||
84 | '\p{Bengali}', |
||
85 | '\p{Cyrillic}', |
||
86 | '\p{Devanagari}', |
||
87 | '\p{Ethiopic}', |
||
88 | '\p{Gujarati}', |
||
89 | '\p{Gurmukhi}', |
||
90 | '\p{Kannada}', |
||
91 | '\p{Malayalam}', |
||
92 | '\p{Oriya}', |
||
93 | '\p{Tamil}', |
||
94 | '\p{Telugu}', |
||
95 | '\p{Thai}', |
||
96 | |||
97 | // Very special characters. |
||
98 | '[' . Strings::uchr( |
||
99 | 8204, // ZERO WIDTH NON-JOINER. |
||
100 | 8205 // ZERO WIDTH JOINER. |
||
101 | ) . ']', |
||
102 | ] |
||
103 | ) |
||
104 | . ')'; |
||
105 | } |
||
106 | |||
107 | /** |
||
108 | * Retrieve patgen segment from TeX hyphenation pattern. |
||
109 | * |
||
110 | * @param string $pattern TeX hyphenation pattern. |
||
111 | * @return string |
||
112 | */ |
||
113 | protected function get_segment( $pattern ) { |
||
114 | return \preg_replace( '/[0-9]/', '', \str_replace( '.', '_', $pattern ) ); |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * Calculate patgen sequence from TeX hyphenation pattern. |
||
119 | * |
||
120 | * @param string $pattern TeX hyphenation pattern. |
||
121 | * |
||
122 | * @throws \RangeException Thrown when the calculated pattern length is invalid. |
||
123 | * |
||
124 | * @return string |
||
125 | */ |
||
126 | protected function get_sequence( $pattern ) { |
||
127 | $characters = Strings::mb_str_split( \str_replace( '.', '_', $pattern ) ); |
||
128 | $result = []; |
||
129 | |||
130 | foreach ( $characters as $index => $chr ) { |
||
131 | if ( \ctype_digit( $chr ) ) { |
||
132 | $result[] = $chr; |
||
133 | } else { |
||
134 | // Append '0' if this is the first character or the previous character was not a number. |
||
135 | if ( ! isset( $characters[ $index - 1 ] ) || ! \ctype_digit( $characters[ $index - 1 ] ) ) { |
||
136 | $result[] = '0'; |
||
137 | } |
||
138 | |||
139 | // Append '0' if this is the last character. |
||
140 | if ( ! isset( $characters[ $index + 1 ] ) ) { |
||
141 | $result[] = '0'; |
||
142 | } |
||
143 | } |
||
144 | } |
||
145 | |||
146 | // Do some error checking. |
||
147 | $count = \count( $result ); |
||
148 | $count_seg = \mb_strlen( $this->get_segment( $pattern ) ); |
||
149 | $sequence = \implode( '', $result ); |
||
150 | |||
151 | if ( $count !== $count_seg + 1 ) { |
||
152 | throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." ); |
||
153 | } |
||
154 | |||
155 | return $sequence; |
||
156 | } |
||
157 | |||
158 | /** |
||
159 | * Format hyphenation pattern file for wp-Typography. |
||
160 | * |
||
161 | * @param array $patterns An array of TeX hyphenation patterns. |
||
162 | * @param array $exceptions { |
||
163 | * An array of hyphenation exceptions. |
||
164 | * |
||
165 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing'). |
||
166 | * } |
||
167 | * @param array $comments An array of TeX comments. |
||
168 | * |
||
169 | * @return string |
||
170 | */ |
||
171 | protected function format_results( array $patterns, array $exceptions, array $comments ) { |
||
172 | $pattern_mapping = []; |
||
173 | |||
174 | foreach ( $patterns as $pattern ) { |
||
175 | $segment = $this->get_segment( $pattern ); |
||
176 | |||
177 | if ( ! isset( $pattern_mapping[ $segment ] ) ) { |
||
178 | $pattern_mapping[ $segment ] = $this->get_sequence( $pattern ); |
||
179 | } |
||
180 | } |
||
181 | |||
182 | // Produce a nice exceptions mapping. |
||
183 | $json_exceptions = []; |
||
184 | foreach ( $exceptions as $exception ) { |
||
185 | $json_exceptions[ \mb_strtolower( \str_replace( '-', '', $exception ) ) ] = \mb_strtolower( $exception ); |
||
186 | } |
||
187 | |||
188 | $json_results = [ |
||
189 | 'language' => $this->language, |
||
190 | 'source_url' => \count( $this->urls ) > 1 ? $this->urls : $this->urls[0], |
||
191 | 'copyright' => \array_map( 'rtrim', $comments ), |
||
192 | 'exceptions' => $json_exceptions, |
||
193 | 'patterns' => $pattern_mapping, |
||
194 | ]; |
||
195 | |||
196 | return \json_encode( $json_results, \JSON_PRETTY_PRINT | \JSON_UNESCAPED_UNICODE ); |
||
197 | } |
||
198 | |||
199 | /** |
||
200 | * Try to match squences of TeX hyphenation exceptions. |
||
201 | * |
||
202 | * @param string $line A line from the TeX pattern file. |
||
203 | * @param array $exceptions { |
||
204 | * An array of hyphenation exceptions. |
||
205 | * |
||
206 | * @type string $key Hyphenated key (e.g. 'something' => 'some-thing'). |
||
207 | * } |
||
208 | * @param int $line_no Optional. Line number. Default 0. |
||
209 | * |
||
210 | * @throws \RangeException Thrown when the exception line is malformed. |
||
211 | * |
||
212 | * @return bool |
||
213 | */ |
||
214 | protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) { |
||
215 | if ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
216 | $exceptions[] = $matches[1]; |
||
217 | return false; |
||
218 | } if ( \preg_match( '/^\s*((?:' . $this->word_class . '+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
219 | $this->match_exceptions( $matches[1], $exceptions, $line_no ); |
||
220 | return false; |
||
221 | } elseif ( \preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
222 | return false; |
||
223 | } elseif ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
224 | $exceptions[] = $matches[1]; |
||
225 | } elseif ( \preg_match( '/^\s*((?:' . $this->word_class . '+\s*)+)(?:%.*)?$/u', $line, $matches ) ) { |
||
226 | // Sometimes there are multiple exceptions on a single line. |
||
227 | foreach ( self::split_at_whitespace( $matches[1] ) as $match ) { |
||
228 | $exceptions[] = $match; |
||
229 | } |
||
230 | } elseif ( \preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) { |
||
231 | // Ignore comments and whitespace in exceptions. |
||
232 | return true; |
||
233 | } else { |
||
234 | throw new \RangeException( "Error: unknown exception $line on line $line_no\n" ); |
||
235 | } |
||
236 | |||
237 | return true; |
||
238 | } |
||
239 | |||
240 | /** |
||
241 | * Try to match a pattern. |
||
242 | * |
||
243 | * @param string $line A line from the TeX pattern file. |
||
244 | * @param array $patterns An array of patterns. |
||
245 | * @param int $line_no Optional. Line number. Default 0. |
||
246 | * |
||
247 | * @throws \RangeException Thrown when the pattern line is malformed. |
||
248 | * |
||
249 | * @return bool |
||
250 | */ |
||
251 | protected function match_patterns( $line, array &$patterns, $line_no = 0 ) { |
||
271 | } |
||
272 | |||
273 | /** |
||
274 | * Replace macros in the given line. |
||
275 | * |
||
276 | * @since 6.1.0 |
||
277 | * |
||
278 | * @param string $line The input string. |
||
279 | * @param string[] $macros The macros. |
||
280 | * |
||
281 | * @return string |
||
282 | */ |
||
283 | protected function expand_macros( $line, array $macros ) { |
||
284 | if ( 0 < \preg_match_all( '/\\\(?<name>\w+)\{(?<arg>[^\}]+)\}/u', $line, $matches, \PREG_SET_ORDER ) ) { |
||
285 | foreach ( $matches as $m ) { |
||
286 | if ( ! empty( $macros[ $m['name'] ] ) ) { |
||
287 | $expanded = \preg_replace( '/#1/', $m['arg'], $macros[ $m['name'] ] ); |
||
288 | $pattern = \preg_quote( $m[0], '/' ); |
||
289 | $line = \preg_replace( "/{$pattern}/u", $expanded, $line ); |
||
290 | } |
||
291 | } |
||
292 | } |
||
293 | |||
294 | return $line; |
||
295 | } |
||
296 | |||
297 | /** |
||
298 | * Split line (fragment) at whitespace. |
||
299 | * |
||
300 | * @param string $line A line (fragment). |
||
301 | * |
||
302 | * @return array |
||
303 | */ |
||
304 | private static function split_at_whitespace( $line ) { |
||
305 | // We can safely cast to an array here, as long as $line convertible to a string. |
||
306 | return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY ); |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Convert the given TeX files. |
||
311 | * |
||
312 | * @throws \RangeException Thrown when a line cannot be parsed at all. |
||
313 | * @throws \RuntimeException Thrown when file does not exist. |
||
314 | * |
||
315 | * @return string |
||
316 | */ |
||
317 | public function convert() { |
||
318 | // Results. |
||
319 | $comments = []; |
||
320 | $patterns = []; |
||
321 | $exceptions = []; |
||
322 | |||
323 | foreach ( $this->urls as $url ) { |
||
324 | $this->convert_single_file( $url, $patterns, $exceptions, $comments ); |
||
325 | } |
||
326 | |||
327 | return $this->format_results( $patterns, $exceptions, $comments ); |
||
328 | } |
||
329 | |||
330 | /** |
||
331 | * Convert the given TeX file. |
||
332 | * |
||
333 | * @since 6.1.0 |
||
334 | * |
||
335 | * @param string $url Pattern file URL. |
||
336 | * @param string[] $patterns Extracted pattern lines. Passed by reference. |
||
337 | * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference. |
||
338 | * @param string[] $comments Extracted comments lines. Passed by reference. |
||
339 | * |
||
340 | * @throws \RangeException Thrown when a line cannot be parsed at all. |
||
341 | * @throws \RuntimeException Thrown when file does not exist. |
||
342 | */ |
||
343 | protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) { |
||
389 | } |
||
390 | } |
||
391 | } |
||
392 | } |
||
393 | } |
||
394 |