Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#59)
by Der Mundschenk
03:47
created

Pattern_Converter::convert()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 11
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 0
1
<?php
2
/**
3
 *  This file is part of PHP-Typography.
4
 *
5
 *  Copyright 2015-2017 Peter Putzer.
6
 *
7
 *  This program is free software; you can redistribute it and/or modify
8
 *  it under the terms of the GNU General Public License as published by
9
 *  the Free Software Foundation; either version 2 of the License, or
10
 *  (at your option) any later version.
11
 *
12
 *  This program is distributed in the hope that it will be useful,
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 *  GNU General Public License for more details.
16
 *
17
 *  You should have received a copy of the GNU General Public License along
18
 *  with this program; if not, write to the Free Software Foundation, Inc.,
19
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20
 *
21
 *  ***
22
 *
23
 *  @package mundschenk-at/php-typography
24
 *  @author Peter Putzer <[email protected]>
25
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
26
 */
27
28
namespace PHP_Typography\Bin;
29
30
use PHP_Typography\Strings;
31
32
/**
33
 *  Convert LaTeX hyphenation pattern files to JSON.
34
 *
35
 *  @author Peter Putzer <[email protected]>
36
 */
37
class Pattern_Converter {
38
39
	/**
40
	 * Pattern file URL(s) to fetch.
41
	 *
42
	 * @since 6.1.0
43
	 *
44
	 * @var string[]
45
	 */
46
	protected $urls;
47
48
	/**
49
	 * Human-readable language name.
50
	 *
51
	 * @var string
52
	 */
53
	protected $language;
54
55
	/**
56
	 * Allowed word characters in PCRE syntax.
57
	 *
58
	 * @var string
59
	 */
60
	protected $word_characters;
61
62
	/**
63
	 * Creates a new converter object.
64
	 *
65
	 * @param string|string[] $urls     The TeX pattern file URL(s).
66
	 * @param string          $language A human-readable language name.
67
	 */
68
	public function __construct( $urls, $language ) {
69
		$this->urls     = (array) $urls;
70
		$this->language = $language;
71
72
		$this->word_characters = join( '', [
73
			"\w.'ʼ᾽ʼ᾿’",
74
			Strings::uchr( 8205, 8204, 768, 769, 771, 772, 775, 776, 784, 803, 805, 814, 817 ),
75
			'\p{Mn}',
76
			'\p{Cyrillic}' . Strings::uchr( 7296, 7297, 7298, 7299, 7300, 7301, 7302, 7303, 7304, 65070, 65071 ),
77
			'\p{Devanagari}' . Strings::uchr( 2385, 2386 ),
78
			'\p{Bengali}',
79
			'\p{Gujarati}',
80
			'\p{Gurmukhi}',
81
			'\p{Kannada}',
82
			'\p{Oriya}',
83
			'\p{Tamil}',
84
			'\p{Telugu}',
85
			'\p{Malayalam}',
86
			'\p{Thai}',
87
			'-',
88
		] );
89
	}
90
91
	/**
92
	 * Retrieve patgen segment from TeX hyphenation pattern.
93
	 *
94
	 * @param string $pattern TeX hyphenation pattern.
95
	 * @return string
96
	 */
97
	protected function get_segment( $pattern ) {
98
		return preg_replace( '/[0-9]/', '', str_replace( '.', '_', $pattern ) );
99
	}
100
101
	/**
102
	 * Calculate patgen sequence from TeX hyphenation pattern.
103
	 *
104
	 * @param string $pattern TeX hyphenation pattern.
105
	 *
106
	 * @throws \RangeException Thrown when the calculated pattern length is invalid.
107
	 *
108
	 * @return string
109
	 */
110
	protected function get_sequence( $pattern ) {
111
		$characters = Strings::mb_str_split( str_replace( '.', '_', $pattern ) );
112
		$result     = [];
113
114
		foreach ( $characters as $index => $chr ) {
115
			if ( ctype_digit( $chr ) ) {
116
				$result[] = $chr;
117
			} else {
118
				// Append '0' if this is the first character or the previous character was not a number.
119
				if ( ! isset( $characters[ $index - 1 ] ) || ! ctype_digit( $characters[ $index - 1 ] ) ) {
120
					$result[] = '0';
121
				}
122
123
				// Append '0' if this is the last character.
124
				if ( ! isset( $characters[ $index + 1 ] ) ) {
125
					$result[] = '0';
126
				}
127
			}
128
		}
129
130
		// Do some error checking.
131
		$count     = count( $result );
132
		$count_seg = mb_strlen( $this->get_segment( $pattern ) );
133
		$sequence  = implode( '', $result );
134
135
		if ( $count !== $count_seg + 1 ) {
136
			throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." );
137
		}
138
139
		return $sequence;
140
	}
141
142
	/**
143
	 * Format hyphenation pattern file for wp-Typography.
144
	 *
145
	 * @param array $patterns An array of TeX hyphenation patterns.
146
	 * @param array $exceptions {
147
	 *      An array of hyphenation exceptions.
148
	 *
149
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
150
	 * }
151
	 * @param array $comments An array of TeX comments.
152
	 *
153
	 * @return string
154
	 */
155
	protected function format_results( array $patterns, array $exceptions, array $comments ) {
156
		$pattern_mapping = [];
157
158
		foreach ( $patterns as $pattern ) {
159
			$segment = $this->get_segment( $pattern );
160
161
			if ( ! isset( $pattern_mapping[ $segment ] ) ) {
162
				$pattern_mapping[ $segment ] = $this->get_sequence( $pattern );
163
			}
164
		}
165
166
		// Produce a nice exceptions mapping.
167
		$json_exceptions = [];
168
		foreach ( $exceptions as $exception ) {
169
			$json_exceptions[ mb_strtolower( str_replace( '-', '', $exception ) ) ] = mb_strtolower( $exception );
170
		}
171
172
		$json_results = [
173
			'language'    => $this->language,
174
			'source_url'  => count( $this->urls ) > 1 ? $this->urls : $this->urls[0],
175
			'copyright'   => array_map( 'rtrim', $comments ),
176
			'exceptions'  => $json_exceptions,
177
			'patterns'    => $pattern_mapping,
178
		];
179
180
		return json_encode( $json_results, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );
181
	}
182
183
	/**
184
	 * Try to match squences of TeX hyphenation exceptions.
185
	 *
186
	 * @param string $line A line from the TeX pattern file.
187
	 * @param array  $exceptions {
188
	 *      An array of hyphenation exceptions.
189
	 *
190
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
191
	 * }
192
	 * @param int    $line_no  Optional. Line number. Default 0.
193
	 *
194
	 * @throws \RangeException Thrown when the exception line is malformed.
195
	 *
196
	 * @return bool
197
	 */
198
	protected function match_exceptions( $line, array &$exceptions, $line_no = 0 ) {
199
		if ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
200
			$exceptions[] = $matches[1];
201
			return false;
202
		} if ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
203
			$this->match_exceptions( $matches[1], $exceptions, $line_no );
204
			return false;
205
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
206
			return false;
207
		} elseif ( preg_match( '/^\s*([' . $this->word_characters . '-]+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
208
			$exceptions[] = $matches[1];
209
		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . '-]+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
210
			// Sometimes there are multiple exceptions on a single line.
211
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
212
				$exceptions[] = $match;
213
			}
214
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
215
			// Ignore comments and whitespace in exceptions.
216
			return true;
217
		} else {
218
			throw new \RangeException( "Error: unknown exception $line on line $line_no\n" );
219
		}
220
221
		return true;
222
	}
223
224
	/**
225
	 * Try to match a pattern.
226
	 *
227
	 * @param string $line     A line from the TeX pattern file.
228
	 * @param array  $patterns An array of patterns.
229
	 * @param int    $line_no  Optional. Line number. Default 0.
230
	 *
231
	 * @throws \RangeException Thrown when the pattern line is malformed.
232
	 *
233
	 * @return bool
234
	 */
235
	protected function match_patterns( $line, array &$patterns, $line_no = 0 ) {
236
		if ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
237
			$patterns[] = $matches[1];
238
			return false;
239
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
240
			return false;
241
		} elseif ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
242
			$patterns[] = $matches[1];
243
		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . ']+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
244
			// Sometimes there are multiple patterns on a single line.
245
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
246
				$patterns[] = $match;
247
			}
248
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
249
			// Ignore comments and whitespace in patterns.
250
			return true;
251
		} else {
252
			throw new \RangeException( 'Error: unknown pattern ' . htmlentities( $line, ENT_NOQUOTES | ENT_HTML5 ) . " on line $line_no\n" );
253
		}
254
255
		return true;
256
	}
257
258
	/**
259
	 * Split line (fragment) at whitespace.
260
	 *
261
	 * @param  string $line A line (fragment).
262
	 *
263
	 * @return array
264
	 */
265
	private static function split_at_whitespace( $line ) {
266
		// We can safely cast to an array here, as long as $line convertible to a string.
267
		return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY );
268
	}
269
270
	/**
271
	 * Convert the given TeX files.
272
	 *
273
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
274
	 * @throws \RuntimeException Thrown when file does not exist.
275
	 *
276
	 * @return string
277
	 */
278
	public function convert() {
279
		// Results.
280
		$comments   = [];
281
		$patterns   = [];
282
		$exceptions = [];
283
284
		foreach ( $this->urls as $url ) {
285
			$this->convert_single_file( $url, $patterns, $exceptions, $comments );
286
		}
287
288
		return $this->format_results( $patterns, $exceptions, $comments );
289
	}
290
291
	/**
292
	 * Convert the given TeX file.
293
	 *
294
	 * @since 6.1.0
295
	 *
296
	 * @param string   $url        Pattern file URL.
297
	 * @param string[] $patterns   Extracted pattern lines. Passed by reference.
298
	 * @param string[] $exceptions Extracted hyphenation exception lines. Passed by reference.
299
	 * @param string[] $comments   Extracted comments lines. Passed by reference.
300
	 *
301
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
302
	 * @throws \RuntimeException Thrown when file does not exist.
303
	 */
304
	protected function convert_single_file( $url, &$patterns, &$exceptions, &$comments ) {
305
		if ( ! file_exists( $url ) && 404 === File_Operations::get_http_response_code( $url ) ) {
306
			throw new \RuntimeException( "Error: unknown pattern file '{$url}'\n" );
307
		}
308
309
		// Status indicators.
310
		$reading_patterns   = false;
311
		$reading_exceptions = false;
312
313
		$file    = new \SplFileObject( $url );
314
		$line_no = 0;
315
		while ( ! $file->eof() ) {
316
			$line = $file->fgets();
317
			$line_no++;
318
319
			if ( $reading_patterns ) {
320
				$reading_patterns = $this->match_patterns( $line, $patterns, $line_no );
321
			} elseif ( $reading_exceptions ) {
322
				$reading_exceptions = $this->match_exceptions( $line, $exceptions, $line_no );
323
			} else {
324
				// Not a pattern & not an exception.
325
				if ( preg_match( '/^\s*%.*$/u', $line, $matches ) ) {
326
					$comments[] = $line;
327
				} elseif ( preg_match( '/^\s*\\\patterns\s*\{\s*(.*)$/u', $line, $matches ) ) {
328
					$reading_patterns = $this->match_patterns( $matches[1], $patterns, $line_no );
329
				} elseif ( preg_match( '/^\s*\\\hyphenation\s*{\s*(.*)$/u', $line, $matches ) ) {
330
					$reading_exceptions = $this->match_exceptions( $matches[1], $exceptions, $line_no );
331
				} elseif ( preg_match( '/^\s*\\\endinput.*$/u', $line, $matches ) ) {
332
					// Ignore this line completely.
333
					continue;
334
				} elseif ( preg_match( '/^\s*\\\[\w]+.*$/u', $line, $matches ) ) {
335
					// Treat other commands as comments unless we are matching exceptions or patterns.
336
					$comments[] = $line;
337
				} elseif ( preg_match( '/^\s*$/u', $line, $matches ) ) {
338
					continue; // Do nothing.
339
				} else {
340
					throw new \RangeException( "Error: unknown string $line at line $line_no\n" );
341
				}
342
			}
343
		}
344
	}
345
}
346