Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

Pattern_Converter::expand_macros()   A
last analyzed

Complexity

Conditions 4
Paths 2

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 7
c 2
b 0
f 0
dl 0
loc 12
rs 10
cc 4
nc 2
nop 2
1
<?php
2
/**
3
 *  This file is part of PHP-Typography.
4
 *
5
 *  Copyright 2015-2020 Peter Putzer.
6
 *
7
 *  This program is free software; you can redistribute it and/or modify
8
 *  it under the terms of the GNU General Public License as published by
9
 *  the Free Software Foundation; either version 2 of the License, or
10
 *  (at your option) any later version.
11
 *
12
 *  This program is distributed in the hope that it will be useful,
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 *  GNU General Public License for more details.
16
 *
17
 *  You should have received a copy of the GNU General Public License along
18
 *  with this program; if not, write to the Free Software Foundation, Inc.,
19
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20
 *
21
 *  ***
22
 *
23
 *  @package mundschenk-at/php-typography
24
 *  @author Peter Putzer <[email protected]>
25
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
26
 */
27
28
namespace PHP_Typography\Bin;
29
30
use PHP_Typography\Strings;
31
32
/**
33
 *  Convert LaTeX hyphenation pattern files to JSON.
34
 *
35
 *  @author Peter Putzer <[email protected]>
36
 */
37
class Pattern_Converter {
38
39
	/**
40
	 * Pattern file URL(s) to fetch.
41
	 *
42
	 * @since 6.1.0
43
	 *
44
	 * @var string[]
45
	 */
46
	protected $urls;
47
48
	/**
49
	 * Human-readable language name.
50
	 *
51
	 * @var string
52
	 */
53
	protected $language;
54
55
	/**
56
	 * A word character class in PCRE2 syntax.
57
	 *
58
	 * @var string
59
	 */
60
	protected $word_class;
61
62
	/**
63
	 * Creates a new converter object.
64
	 *
65
	 * @param string|string[] $urls     The TeX pattern file URL(s).
66
	 * @param string          $language A human-readable language name.
67
	 */
68
	public function __construct( $urls, $language ) {
69
		$this->urls     = (array) $urls;
70
		$this->language = $language;
71
72
		// We need to use a non-matching group here because strangely PCRE2 does
73
		// not allow the "script" classes to be used as part of a real character class.
74
		$this->word_class = '(?:' .
75
			\join(
76
				'|',
77
				[
78
					'\p{Xan}',     // Alphanumeric characters.
79
					"[.'ʼ᾽ʼ᾿’\-]", // Allowed punctuation.
80
					'\p{S}',       // Symbols.
81
					'\p{Mn}',      // Non-spacing marks (diacritics).
82
83
					// Additional code points used by Non-latin scripts.
84
					'\p{Bengali}',
85
					'\p{Cyrillic}',
86
					'\p{Devanagari}',
87
					'\p{Ethiopic}',
88
					'\p{Gujarati}',
89
					'\p{Gurmukhi}',
90
					'\p{Kannada}',
91
					'\p{Malayalam}',
92
					'\p{Oriya}',
93
					'\p{Tamil}',
94
					'\p{Telugu}',
95
					'\p{Thai}',
96
97
					// Very special characters.
98
					'[' . Strings::uchr(
99
						8204, // ZERO WIDTH NON-JOINER.
100
						8205  // ZERO WIDTH JOINER.
101
					) . ']',
102
				]
103
			)
104
		. ')';
105
	}
106
107
	/**
108
	 * Retrieve patgen segment from TeX hyphenation pattern.
109
	 *
110
	 * @param string $pattern TeX hyphenation pattern.
111
	 * @return string
112
	 */
113
	protected function get_segment( $pattern ) {
114
		return \preg_replace( '/[0-9]/', '', \str_replace( '.', '_', $pattern ) );
115
	}
116
117
	/**
118
	 * Calculate patgen sequence from TeX hyphenation pattern.
119
	 *
120
	 * @param string $pattern TeX hyphenation pattern.
121
	 *
122
	 * @throws \RangeException Thrown when the calculated pattern length is invalid.
123
	 *
124
	 * @return string
125
	 */
126
	protected function get_sequence( $pattern ) {
127
		$characters = Strings::mb_str_split( \str_replace( '.', '_', $pattern ) );
128
		$result     = [];
129
130
		foreach ( $characters as $index => $chr ) {
131
			if ( \ctype_digit( $chr ) ) {
132
				$result[] = $chr;
133
			} else {
134
				// Append '0' if this is the first character or the previous character was not a number.
135
				if ( ! isset( $characters[ $index - 1 ] ) || ! \ctype_digit( $characters[ $index - 1 ] ) ) {
136
					$result[] = '0';
137
				}
138
139
				// Append '0' if this is the last character.
140
				if ( ! isset( $characters[ $index + 1 ] ) ) {
141
					$result[] = '0';
142
				}
143
			}
144
		}
145
146
		// Do some error checking.
147
		$count     = \count( $result );
148
		$count_seg = \mb_strlen( $this->get_segment( $pattern ) );
149
		$sequence  = \implode( '', $result );
150
151
		if ( $count !== $count_seg + 1 ) {
152
			throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." );
153
		}
154
155
		return $sequence;
156
	}
157
158
	/**
159
	 * Format hyphenation pattern file for wp-Typography.
160
	 *
161
	 * @param array $patterns An array of TeX hyphenation patterns.
162
	 * @param array $exceptions {
163
	 *      An array of hyphenation exceptions.
164
	 *
165
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
166
	 * }
167
	 * @param array $comments An array of TeX comments.
168
	 *
169
	 * @return string
170
	 */
171
	protected function format_results( array $patterns, array $exceptions, array $comments ) {
172
		$pattern_mapping = [];
173
174
		foreach ( $patterns as $pattern ) {
175
			$segment = $this->get_segment( $pattern );
176
177
			if ( ! isset( $pattern_mapping[ $segment ] ) ) {
178
				$pattern_mapping[ $segment ] = $this->get_sequence( $pattern );
179
			}
180
		}
181
182
		// Produce a nice exceptions mapping.
183
		$json_exceptions = [];
184
		foreach ( $exceptions as $exception ) {
185
			$json_exceptions[ \mb_strtolower( \str_replace( '-', '', $exception ) ) ] = \mb_strtolower( $exception );
186
		}
187
188
		$json_results = [
189
			'language'    => $this->language,
190
			'source_url'  => \count( $this->urls ) > 1 ? $this->urls : $this->urls[0],
191
			'copyright'   => \array_map( 'rtrim', $comments ),
192
			'exceptions'  => $json_exceptions,
193
			'patterns'    => $pattern_mapping,
194
		];
195
196
		return \json_encode( $json_results, \JSON_PRETTY_PRINT | \JSON_UNESCAPED_UNICODE );
197
	}
198
199
	/**
200
	 * Try to match squences of TeX hyphenation exceptions.
201
	 *
202
	 * @param string $line A line from the TeX pattern file.
203
	 * @param array  $exceptions {
204
	 *      An array of hyphenation exceptions.
205
	 *
206
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
207
	 * }
208
	 * @param int    $line_no  Optional. Line number. Default 0.
209
	 *
210
	 * @throws \RangeException Thrown when the exception line is malformed.
211
	 *
212
	 * @return bool
213
	 */
214
	protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) {
215
		if ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
216
			$exceptions[] = $matches[1];
217
			return false;
218
		} if ( \preg_match( '/^\s*((?:' . $this->word_class . '+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
219
			$this->match_exceptions( $matches[1], $exceptions, $line_no );
220
			return false;
221
		} elseif ( \preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
222
			return false;
223
		} elseif ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
224
			$exceptions[] = $matches[1];
225
		} elseif ( \preg_match( '/^\s*((?:' . $this->word_class . '+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
226
			// Sometimes there are multiple exceptions on a single line.
227
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
228
				$exceptions[] = $match;
229
			}
230
		} elseif ( \preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
231
			// Ignore comments and whitespace in exceptions.
232
			return true;
233
		} else {
234
			throw new \RangeException( "Error: unknown exception $line on line $line_no\n" );
235
		}
236
237
		return true;
238
	}
239
240
	/**
241
	 * Try to match a pattern.
242
	 *
243
	 * @param string $line     A line from the TeX pattern file.
244
	 * @param array  $patterns An array of patterns.
245
	 * @param int    $line_no  Optional. Line number. Default 0.
246
	 *
247
	 * @throws \RangeException Thrown when the pattern line is malformed.
248
	 *
249
	 * @return bool
250
	 */
251
	protected function match_patterns( $line, array &$patterns, $line_no = 0 ) {
252
		if ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*\}\s*(?:%.*)?$/u', $line, $matches ) ) {
253
			$patterns[] = $matches[1];
254
			return false;
255
		} elseif ( \preg_match( '/^\s*\}\s*(?:%.*)?$/u', $line, $matches ) ) {
256
			return false;
257
		} elseif ( \preg_match( '/^\s*(' . $this->word_class . '+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
258
			$patterns[] = $matches[1];
259
		} elseif ( \preg_match( '/^\s*((?:' . $this->word_class . '+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
260
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
261
				$patterns[] = $match;
262
			}
263
		} elseif ( \preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
264
			// Ignore comments and whitespace in patterns.
265
			return true;
266
		} else {
267
			throw new \RangeException( "Error: unknown pattern $line on line $line_no\n" );
268
		}
269
270
		return true;
271
	}
272
273
	/**
274
	 * Replace macros in the given line.
275
	 *
276
	 * @since 6.1.0
277
	 *
278
	 * @param  string   $line   The input string.
279
	 * @param  string[] $macros The macros.
280
	 *
281
	 * @return string
282
	 */
283
	protected function expand_macros( $line, array $macros ) {
284
		if ( 0 < \preg_match_all( '/\\\(?<name>\w+)\{(?<arg>[^\}]+)\}/u', $line, $matches, \PREG_SET_ORDER ) ) {
285
			foreach ( $matches as $m ) {
286
				if ( ! empty( $macros[ $m['name'] ] ) ) {
287
					$expanded = \preg_replace( '/#1/', $m['arg'], $macros[ $m['name'] ] );
288
					$pattern  = \preg_quote( $m[0], '/' );
289
					$line     = \preg_replace( "/{$pattern}/u", $expanded, $line );
290
				}
291
			}
292
		}
293
294
		return $line;
295
	}
296
297
	/**
298
	 * Split line (fragment) at whitespace.
299
	 *
300
	 * @param  string $line A line (fragment).
301
	 *
302
	 * @return array
303
	 */
304
	private static function split_at_whitespace( $line ) {
305
		// We can safely cast to an array here, as long as $line convertible to a string.
306
		return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY );
307
	}
308
309
	/**
310
	 * Convert the given TeX files.
311
	 *
312
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
313
	 * @throws \RuntimeException Thrown when file does not exist.
314
	 *
315
	 * @return string
316
	 */
317
	public function convert() {
318
		// Results.
319
		$comments   = [];
320
		$patterns   = [];
321
		$exceptions = [];
322
323
		foreach ( $this->urls as $url ) {
324
			$this->convert_single_file( $url, $patterns, $exceptions, $comments );
325
		}
326
327
		return $this->format_results( $patterns, $exceptions, $comments );
328
	}
329
330
	/**
331
	 * Convert the given TeX file.
332
	 *
333
	 * @since 6.1.0
334
	 *
335
	 * @param string   $url        Pattern file URL.
336
	 * @param string[] $patterns   Extracted pattern lines. Passed by reference.
337
	 * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference.
338
	 * @param string[] $comments   Extracted comments lines. Passed by reference.
339
	 *
340
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
341
	 * @throws \RuntimeException Thrown when file does not exist.
342
	 */
343
	protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) {
344
		if ( ! \file_exists( $url ) && 404 === File_Operations::get_http_response_code( $url ) ) {
345
			throw new \RuntimeException( "Error: unknown pattern file '{$url}'\n" );
346
		}
347
348
		// Status indicators.
349
		$reading_patterns   = false;
350
		$reading_exceptions = false;
351
352
		// Macro definitions.
353
		$macros = [];
354
355
		$file    = new \SplFileObject( $url );
356
		$line_no = 0;
357
		while ( ! $file->eof() ) {
358
			$line = $file->fgets();
359
			$line_no++;
360
361
			if ( $reading_patterns ) {
362
				$reading_patterns = $this->match_patterns( $this->expand_macros( $line, $macros ), $patterns, $line_no );
363
			} elseif ( $reading_exceptions ) {
364
				$reading_exceptions = $this->match_exceptions( $this->expand_macros( $line, $macros ), $exceptions, $line_no );
365
			} else {
366
				// Not a pattern & not an exception.
367
				if ( \preg_match( '/^\s*%.*$/u', $line, $matches ) ) {
368
					$comments[] = $line;
369
				} elseif ( \preg_match( '/^\s*\\\patterns\s*\{\s*(.*)$/u', $line, $matches ) ) {
370
					$reading_patterns = $this->match_patterns( $matches[1], $patterns, $line_no );
371
				} elseif ( \preg_match( '/^\s*\\\hyphenation\s*{\s*(.*)$/u', $line, $matches ) ) {
372
					$reading_exceptions = $this->match_exceptions( $matches[1], $exceptions, $line_no );
373
				} elseif ( \preg_match( '/^\s*\\\endinput.*$/u', $line, $matches ) ) {
374
					// Ignore this line completely.
375
					continue;
376
				} elseif ( \preg_match( '/^\s*\\\def\\\(\w+)#1\s*\{([^\}]*)\}\s*$/u', $line, $matches ) ) {
377
					// Add a macro definition.
378
					$macros[ $matches[1] ] = $matches[2];
379
				} elseif ( \preg_match( '/^\s*\\\edef\\\(\w+)#1\s*\{(.*)\}\s*$/u', $line, $matches ) ) {
380
					// Add a macro definition and expand any contained macros.
381
					$macros[ $matches[1] ] = $this->expand_macros( $matches[2], $macros );
382
				} elseif ( \preg_match( '/^\s*\\\[\w]+.*$/u', $line, $matches ) ) {
383
					// Treat other commands as comments unless we are matching exceptions or patterns.
384
					$comments[] = $line;
385
				} elseif ( \preg_match( '/^\s*$/u', $line, $matches ) ) {
386
					continue; // Do nothing.
387
				} else {
388
					throw new \RangeException( "Error: unknown string $line at line $line_no\n" );
389
				}
390
			}
391
		}
392
	}
393
}
394