Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( de054b...31341b )
by Der Mundschenk
12s
created

Pattern_Converter::get_segment()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 2
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 1
1
<?php
2
/**
3
 *  This file is part of PHP-Typography.
4
 *
5
 *  Copyright 2015-2018 Peter Putzer.
6
 *
7
 *  This program is free software; you can redistribute it and/or modify
8
 *  it under the terms of the GNU General Public License as published by
9
 *  the Free Software Foundation; either version 2 of the License, or
10
 *  (at your option) any later version.
11
 *
12
 *  This program is distributed in the hope that it will be useful,
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 *  GNU General Public License for more details.
16
 *
17
 *  You should have received a copy of the GNU General Public License along
18
 *  with this program; if not, write to the Free Software Foundation, Inc.,
19
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20
 *
21
 *  ***
22
 *
23
 *  @package mundschenk-at/php-typography
24
 *  @author Peter Putzer <[email protected]>
25
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
26
 */
27
28
namespace PHP_Typography\Bin;
29
30
use PHP_Typography\Strings;
31
32
/**
33
 *  Convert LaTeX hyphenation pattern files to JSON.
34
 *
35
 *  @author Peter Putzer <[email protected]>
36
 */
37
class Pattern_Converter {
38
39
	/**
40
	 * Pattern file URL(s) to fetch.
41
	 *
42
	 * @since 6.1.0
43
	 *
44
	 * @var string[]
45
	 */
46
	protected $urls;
47
48
	/**
49
	 * Human-readable language name.
50
	 *
51
	 * @var string
52
	 */
53
	protected $language;
54
55
	/**
56
	 * Allowed word characters in PCRE syntax.
57
	 *
58
	 * @var string
59
	 */
60
	protected $word_characters;
61
62
	/**
63
	 * Creates a new converter object.
64
	 *
65
	 * @param string|string[] $urls     The TeX pattern file URL(s).
66
	 * @param string          $language A human-readable language name.
67
	 */
68
	public function __construct( $urls, $language ) {
69
		$this->urls     = (array) $urls;
70
		$this->language = $language;
71
72
		$this->word_characters = join( '', [
73
			"\w.'ʼ᾽ʼ᾿’",
74
			Strings::uchr( 8205, 8204, 768, 769, 771, 772, 775, 776, 784, 803, 805, 814, 817 ),
75
			'\p{Mn}',
76
			'\p{Bengali}',
77
			'\p{Cyrillic}' . Strings::uchr( 7296, 7297, 7298, 7299, 7300, 7301, 7302, 7303, 7304, 65070, 65071 ),
78
			'\p{Devanagari}' . Strings::uchr( 2385, 2386 ),
79
			'\p{Ethiopic}',
80
			'\p{Gujarati}',
81
			'\p{Gurmukhi}',
82
			'\p{Kannada}',
83
			'\p{Malayalam}',
84
			'\p{Oriya}',
85
			'\p{Tamil}',
86
			'\p{Telugu}',
87
			'\p{Thai}',
88
			'-',
89
		] );
90
	}
91
92
	/**
93
	 * Retrieve patgen segment from TeX hyphenation pattern.
94
	 *
95
	 * @param string $pattern TeX hyphenation pattern.
96
	 * @return string
97
	 */
98
	protected function get_segment( $pattern ) {
99
		return preg_replace( '/[0-9]/', '', str_replace( '.', '_', $pattern ) );
100
	}
101
102
	/**
103
	 * Calculate patgen sequence from TeX hyphenation pattern.
104
	 *
105
	 * @param string $pattern TeX hyphenation pattern.
106
	 *
107
	 * @throws \RangeException Thrown when the calculated pattern length is invalid.
108
	 *
109
	 * @return string
110
	 */
111
	protected function get_sequence( $pattern ) {
112
		$characters = Strings::mb_str_split( str_replace( '.', '_', $pattern ) );
113
		$result     = [];
114
115
		foreach ( $characters as $index => $chr ) {
116
			if ( ctype_digit( $chr ) ) {
117
				$result[] = $chr;
118
			} else {
119
				// Append '0' if this is the first character or the previous character was not a number.
120
				if ( ! isset( $characters[ $index - 1 ] ) || ! ctype_digit( $characters[ $index - 1 ] ) ) {
121
					$result[] = '0';
122
				}
123
124
				// Append '0' if this is the last character.
125
				if ( ! isset( $characters[ $index + 1 ] ) ) {
126
					$result[] = '0';
127
				}
128
			}
129
		}
130
131
		// Do some error checking.
132
		$count     = count( $result );
133
		$count_seg = mb_strlen( $this->get_segment( $pattern ) );
134
		$sequence  = implode( '', $result );
135
136
		if ( $count !== $count_seg + 1 ) {
137
			throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." );
138
		}
139
140
		return $sequence;
141
	}
142
143
	/**
144
	 * Format hyphenation pattern file for wp-Typography.
145
	 *
146
	 * @param array $patterns An array of TeX hyphenation patterns.
147
	 * @param array $exceptions {
148
	 *      An array of hyphenation exceptions.
149
	 *
150
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
151
	 * }
152
	 * @param array $comments An array of TeX comments.
153
	 *
154
	 * @return string
155
	 */
156
	protected function format_results( array $patterns, array $exceptions, array $comments ) {
157
		$pattern_mapping = [];
158
159
		foreach ( $patterns as $pattern ) {
160
			$segment = $this->get_segment( $pattern );
161
162
			if ( ! isset( $pattern_mapping[ $segment ] ) ) {
163
				$pattern_mapping[ $segment ] = $this->get_sequence( $pattern );
164
			}
165
		}
166
167
		// Produce a nice exceptions mapping.
168
		$json_exceptions = [];
169
		foreach ( $exceptions as $exception ) {
170
			$json_exceptions[ mb_strtolower( str_replace( '-', '', $exception ) ) ] = mb_strtolower( $exception );
171
		}
172
173
		$json_results = [
174
			'language'    => $this->language,
175
			'source_url'  => count( $this->urls ) > 1 ? $this->urls : $this->urls[0],
176
			'copyright'   => array_map( 'rtrim', $comments ),
177
			'exceptions'  => $json_exceptions,
178
			'patterns'    => $pattern_mapping,
179
		];
180
181
		return json_encode( $json_results, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );
182
	}
183
184
	/**
185
	 * Try to match squences of TeX hyphenation exceptions.
186
	 *
187
	 * @param string $line A line from the TeX pattern file.
188
	 * @param array  $exceptions {
189
	 *      An array of hyphenation exceptions.
190
	 *
191
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
192
	 * }
193
	 * @param int    $line_no  Optional. Line number. Default 0.
194
	 *
195
	 * @throws \RangeException Thrown when the exception line is malformed.
196
	 *
197
	 * @return bool
198
	 */
199
	protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) {
200
		if ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
201
			$exceptions[] = $matches[1];
202
			return false;
203
		} if ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
204
			$this->match_exceptions( $matches[1], $exceptions, $line_no );
205
			return false;
206
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
207
			return false;
208
		} elseif ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
209
			$exceptions[] = $matches[1];
210
		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
211
			// Sometimes there are multiple exceptions on a single line.
212
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
213
				$exceptions[] = $match;
214
			}
215
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
216
			// Ignore comments and whitespace in exceptions.
217
			return true;
218
		} else {
219
			throw new \RangeException( "Error: unknown exception $line on line $line_no\n" );
220
		}
221
222
		return true;
223
	}
224
225
	/**
226
	 * Try to match a pattern.
227
	 *
228
	 * @param string $line     A line from the TeX pattern file.
229
	 * @param array  $patterns An array of patterns.
230
	 * @param int    $line_no  Optional. Line number. Default 0.
231
	 *
232
	 * @throws \RangeException Thrown when the pattern line is malformed.
233
	 *
234
	 * @return bool
235
	 */
236
	protected function match_patterns( $line, array &$patterns, $line_no = 0 ) {
237
		if ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
238
			$patterns[] = $matches[1];
239
			return false;
240
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
241
			return false;
242
		} elseif ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
243
			$patterns[] = $matches[1];
244
		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . ']+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
245
			// Sometimes there are multiple patterns on a single line.
246
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
247
				$patterns[] = $match;
248
			}
249
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
250
			// Ignore comments and whitespace in patterns.
251
			return true;
252
		} else {
253
			throw new \RangeException( "Error: unknown pattern $line on line $line_no\n" );
254
		}
255
256
		return true;
257
	}
258
259
	/**
260
	 * Replace macros in the given line.
261
	 *
262
	 * @since 6.1.0
263
	 *
264
	 * @param  string   $line   The input string.
265
	 * @param  string[] $macros The macros.
266
	 *
267
	 * @return string
268
	 */
269
	protected function expand_macros( $line, array $macros ) {
270
		if ( 0 < preg_match_all( '/\\\(?<name>\w+)\{(?<arg>[^\}]+)\}/u', $line, $matches, PREG_SET_ORDER ) ) {
271
			foreach ( $matches as $m ) {
272
				if ( ! empty( $macros[ $m['name'] ] ) ) {
273
					$expanded = preg_replace( '/#1/', $m['arg'], $macros[ $m['name'] ] );
274
					$pattern  = preg_quote( $m[0] );
275
					$line     = preg_replace( "/{$pattern}/u", $expanded, $line );
276
				}
277
			}
278
		}
279
280
		return $line;
281
	}
282
283
	/**
284
	 * Split line (fragment) at whitespace.
285
	 *
286
	 * @param  string $line A line (fragment).
287
	 *
288
	 * @return array
289
	 */
290
	private static function split_at_whitespace( $line ) {
291
		// We can safely cast to an array here, as long as $line convertible to a string.
292
		return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY );
293
	}
294
295
	/**
296
	 * Convert the given TeX files.
297
	 *
298
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
299
	 * @throws \RuntimeException Thrown when file does not exist.
300
	 *
301
	 * @return string
302
	 */
303
	public function convert() {
304
		// Results.
305
		$comments   = [];
306
		$patterns   = [];
307
		$exceptions = [];
308
309
		foreach ( $this->urls as $url ) {
310
			$this->convert_single_file( $url, $patterns, $exceptions, $comments );
311
		}
312
313
		return $this->format_results( $patterns, $exceptions, $comments );
314
	}
315
316
	/**
317
	 * Convert the given TeX file.
318
	 *
319
	 * @since 6.1.0
320
	 *
321
	 * @param string   $url        Pattern file URL.
322
	 * @param string[] $patterns   Extracted pattern lines. Passed by reference.
323
	 * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference.
324
	 * @param string[] $comments   Extracted comments lines. Passed by reference.
325
	 *
326
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
327
	 * @throws \RuntimeException Thrown when file does not exist.
328
	 */
329
	protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) {
330
		if ( ! file_exists( $url ) && 404 === File_Operations::get_http_response_code( $url ) ) {
331
			throw new \RuntimeException( "Error: unknown pattern file '{$url}'\n" );
332
		}
333
334
		// Status indicators.
335
		$reading_patterns   = false;
336
		$reading_exceptions = false;
337
338
		// Macro definitions.
339
		$macros = [];
340
341
		$file    = new \SplFileObject( $url );
342
		$line_no = 0;
343
		while ( ! $file->eof() ) {
344
			$line = $file->fgets();
345
			$line_no++;
346
347
			if ( $reading_patterns ) {
348
				$reading_patterns = $this->match_patterns( $this->expand_macros( $line, $macros ), $patterns, $line_no );
349
			} elseif ( $reading_exceptions ) {
350
				$reading_exceptions = $this->match_exceptions( $this->expand_macros( $line, $macros ), $exceptions, $line_no );
351
			} else {
352
				// Not a pattern & not an exception.
353
				if ( preg_match( '/^\s*%.*$/u', $line, $matches ) ) {
354
					$comments[] = $line;
355
				} elseif ( preg_match( '/^\s*\\\patterns\s*\{\s*(.*)$/u', $line, $matches ) ) {
356
					$reading_patterns = $this->match_patterns( $matches[1], $patterns, $line_no );
357
				} elseif ( preg_match( '/^\s*\\\hyphenation\s*{\s*(.*)$/u', $line, $matches ) ) {
358
					$reading_exceptions = $this->match_exceptions( $matches[1], $exceptions, $line_no );
359
				} elseif ( preg_match( '/^\s*\\\endinput.*$/u', $line, $matches ) ) {
360
					// Ignore this line completely.
361
					continue;
362
				} elseif ( preg_match( '/^\s*\\\def\\\(\w+)#1\s*\{([^\}]*)\}\s*$/u', $line, $matches ) ) {
363
					// Add a macro definition.
364
					$macros[ $matches[1] ] = $matches[2];
365
				} elseif ( preg_match( '/^\s*\\\edef\\\(\w+)#1\s*\{(.*)\}\s*$/u', $line, $matches ) ) {
366
					// Add a macro definition and expand any contained macros.
367
					$macros[ $matches[1] ] = $this->expand_macros( $matches[2], $macros );
368
				} elseif ( preg_match( '/^\s*\\\[\w]+.*$/u', $line, $matches ) ) {
369
					// Treat other commands as comments unless we are matching exceptions or patterns.
370
					$comments[] = $line;
371
				} elseif ( preg_match( '/^\s*$/u', $line, $matches ) ) {
372
					continue; // Do nothing.
373
				} else {
374
					throw new \RangeException( "Error: unknown string $line at line $line_no\n" );
375
				}
376
			}
377
		}
378
	}
379
}
380