Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#56)
by Der Mundschenk
04:05
created

Pattern_Converter::get_segment()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 2
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 1
1
<?php
2
/**
3
 *  This file is part of PHP-Typography.
4
 *
5
 *  Copyright 2015-2017 Peter Putzer.
6
 *
7
 *  This program is free software; you can redistribute it and/or modify
8
 *  it under the terms of the GNU General Public License as published by
9
 *  the Free Software Foundation; either version 2 of the License, or
10
 *  (at your option) any later version.
11
 *
12
 *  This program is distributed in the hope that it will be useful,
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 *  GNU General Public License for more details.
16
 *
17
 *  You should have received a copy of the GNU General Public License along
18
 *  with this program; if not, write to the Free Software Foundation, Inc.,
19
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20
 *
21
 *  ***
22
 *
23
 *  @package mundschenk-at/php-typography
24
 *  @author Peter Putzer <[email protected]>
25
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
26
 */
27
28
namespace PHP_Typography\Bin;
29
30
use PHP_Typography\Strings;
31
32
/**
33
 *  Convert LaTeX hyphenation pattern files to JSON.
34
 *
35
 *  @author Peter Putzer <[email protected]>
36
 */
37
class Pattern_Converter {
38
39
	/**
40
	 * Pattern file URL to fetch.
41
	 *
42
	 * @var string
43
	 */
44
	protected $url;
45
46
	/**
47
	 * Human-readable language name.
48
	 *
49
	 * @var string
50
	 */
51
	protected $language;
52
53
	/**
54
	 * Allowed word characters in PCRE syntax.
55
	 *
56
	 * @var string
57
	 */
58
	protected $word_characters;
59
60
	/**
61
	 * Creates a new converter object.
62
	 *
63
	 * @param string $url      The TeX pattern file URL.
64
	 * @param string $language A human-readable language name.
65
	 */
66
	public function __construct( $url, $language ) {
67
		$this->url      = $url;
68
		$this->language = $language;
69
70
		$this->word_characters = join( '', [
71
			"\w.'ʼ᾽ʼ᾿’",
72
			Strings::uchr( 8205, 8204, 768, 769, 771, 772, 775, 776, 784, 803, 805, 814, 817 ),
73
			'\p{Devanagari}' . Strings::uchr( 2385, 2386 ),
74
			'\p{Bengali}',
75
			'\p{Gujarati}',
76
			'\p{Gurmukhi}',
77
			'\p{Kannada}',
78
			'\p{Oriya}',
79
			'\p{Tamil}',
80
			'\p{Telugu}',
81
			'\p{Malayalam}',
82
			'\p{Thai}',
83
			'-',
84
		] );
85
	}
86
87
	/**
88
	 * Retrieve patgen segment from TeX hyphenation pattern.
89
	 *
90
	 * @param string $pattern TeX hyphenation pattern.
91
	 * @return string
92
	 */
93
	protected function get_segment( $pattern ) {
94
		return preg_replace( '/[0-9]/', '', str_replace( '.', '_', $pattern ) );
95
	}
96
97
	/**
98
	 * Calculate patgen sequence from TeX hyphenation pattern.
99
	 *
100
	 * @param string $pattern TeX hyphenation pattern.
101
	 *
102
	 * @throws \RangeException Thrown when the calculated pattern length is invalid.
103
	 *
104
	 * @return string
105
	 */
106
	protected function get_sequence( $pattern ) {
107
		$characters = Strings::mb_str_split( str_replace( '.', '_', $pattern ) );
108
		$result     = [];
109
110
		foreach ( $characters as $index => $chr ) {
111
			if ( ctype_digit( $chr ) ) {
112
				$result[] = $chr;
113
			} else {
114
				// Append '0' if this is the first character or the previous character was not a number.
115
				if ( ! isset( $characters[ $index - 1 ] ) || ! ctype_digit( $characters[ $index - 1 ] ) ) {
116
					$result[] = '0';
117
				}
118
119
				// Append '0' if this is the last character.
120
				if ( ! isset( $characters[ $index + 1 ] ) ) {
121
					$result[] = '0';
122
				}
123
			}
124
		}
125
126
		// Do some error checking.
127
		$count     = count( $result );
128
		$count_seg = mb_strlen( $this->get_segment( $pattern ) );
129
		$sequence  = implode( '', $result );
130
131
		if ( $count !== $count_seg + 1 ) {
132
			throw new \RangeException( "Invalid segment length $count for pattern $pattern (result sequence $sequence)." );
133
		}
134
135
		return $sequence;
136
	}
137
138
	/**
139
	 * Format hyphenation pattern file for wp-Typography.
140
	 *
141
	 * @param array $patterns An array of TeX hyphenation patterns.
142
	 * @param array $exceptions {
143
	 *      An array of hyphenation exceptions.
144
	 *
145
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
146
	 * }
147
	 * @param array $comments An array of TeX comments.
148
	 *
149
	 * @return string
150
	 */
151
	protected function format_results( array $patterns, array $exceptions, array $comments ) {
152
		$pattern_mapping = [];
153
154
		foreach ( $patterns as $pattern ) {
155
			$segment = $this->get_segment( $pattern );
156
157
			if ( ! isset( $pattern_mapping[ $segment ] ) ) {
158
				$pattern_mapping[ $segment ] = $this->get_sequence( $pattern );
159
			}
160
		}
161
162
		// Produce a nice exceptions mapping.
163
		$json_exceptions = [];
164
		foreach ( $exceptions as $exception ) {
165
			$json_exceptions[ mb_strtolower( str_replace( '-', '', $exception ) ) ] = mb_strtolower( $exception );
166
		}
167
168
		$json_results = [
169
			'language'   => $this->language,
170
			'source_url' => $this->url,
171
			'copyright'  => array_map( 'rtrim', $comments ),
172
			'exceptions' => $json_exceptions,
173
			'patterns'   => $pattern_mapping,
174
		];
175
176
		return json_encode( $json_results, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );
177
	}
178
179
	/**
180
	 * Try to match squences of TeX hyphenation exceptions.
181
	 *
182
	 * @param string $line A line from the TeX pattern file.
183
	 * @param array  $exceptions {
184
	 *      An array of hyphenation exceptions.
185
	 *
186
	 *      @type string $key Hyphenated key (e.g. 'something' => 'some-thing').
187
	 * }
188
	 *
189
	 * @throws \RangeException Thrown when the exception line is malformed.
190
	 *
191
	 * @return bool
192
	 */
193
	protected function match_exceptions( $line, array &$exceptions ) {
194
		if ( preg_match( '/^\s*([\w-]+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
195
			$exceptions[] = $matches[1];
196
			return false;
197
		} if ( preg_match( '/^\s*((?:[\w-]+\s*)+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
198
			$this->match_exceptions( $matches[1], $exceptions );
199
			return false;
200
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
201
			return false;
202
		} elseif ( preg_match( '/^\s*([\w-]+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
203
			$exceptions[] = $matches[1];
204
		} elseif ( preg_match( '/^\s*((?:[\w-]+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
205
			// Sometimes there are multiple exceptions on a single line.
206
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
207
				$exceptions[] = $match;
208
			}
209
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
210
			// Ignore comments and whitespace in exceptions.
211
			return true;
212
		} else {
213
			throw new \RangeException( "Error: unknown exception line $line\n" );
214
		}
215
216
		return true;
217
	}
218
219
	/**
220
	 * Try to match a pattern.
221
	 *
222
	 * @param string $line     A line from the TeX pattern file.
223
	 * @param array  $patterns An array of patterns.
224
	 *
225
	 * @throws \RangeException Thrown when the pattern line is malformed.
226
	 *
227
	 * @return bool
228
	 */
229
	protected function match_patterns( $line, array &$patterns ) {
230
		if ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
231
			$patterns[] = $matches[1];
232
			return false;
233
		} elseif ( preg_match( '/^\s*}\s*(?:%.*)?$/u', $line, $matches ) ) {
234
			return false;
235
		} elseif ( preg_match( '/^\s*([' . $this->word_characters . ']+)\s*(?:%.*)?$/u',  $line, $matches ) ) {
236
			$patterns[] = $matches[1];
237
		} elseif ( preg_match( '/^\s*((?:[' . $this->word_characters . ']+\s*)+)(?:%.*)?$/u',  $line, $matches ) ) {
238
			// Sometimes there are multiple patterns on a single line.
239
			foreach ( self::split_at_whitespace( $matches[1] ) as $match ) {
240
				$patterns[] = $match;
241
			}
242
		} elseif ( preg_match( '/^\s*(?:%.*)?$/u', $line, $matches ) ) {
243
			// Ignore comments and whitespace in patterns.
244
			return true;
245
		} else {
246
			throw new \RangeException( 'Error: unknown pattern line ' . htmlentities( $line, ENT_NOQUOTES | ENT_HTML5 ) . "\n" );
247
		}
248
249
		return true;
250
	}
251
252
	/**
253
	 * Split line (fragment) at whitespace.
254
	 *
255
	 * @param  string $line A line (fragment).
256
	 *
257
	 * @return array
258
	 */
259
	private static function split_at_whitespace( $line ) {
260
		// We can safely cast to an array here, as long as $line convertible to a string.
261
		return (array) \preg_split( '/\s+/Su', $line, -1, PREG_SPLIT_NO_EMPTY );
262
	}
263
264
	/**
265
	 * Convert the given TeX file.
266
	 *
267
	 * @throws \RangeException Thrown when a line cannot be parsed at all.
268
	 * @throws \RuntimeException Thrown when file does not exist.
269
	 *
270
	 * @return string
271
	 */
272
	public function convert() {
273
		if ( ! file_exists( $this->url ) && 404 === File_Operations::get_http_response_code( $this->url ) ) {
274
			throw new \RuntimeException( "Error: unknown pattern file '{$this->url}'\n" );
275
		}
276
277
		// Results.
278
		$comments   = [];
279
		$patterns   = [];
280
		$exceptions = [];
281
282
		// Status indicators.
283
		$reading_patterns   = false;
284
		$reading_exceptions = false;
285
286
		$file = new \SplFileObject( $this->url );
287
		while ( ! $file->eof() ) {
288
			$line = $file->fgets();
289
290
			if ( $reading_patterns ) {
291
				$reading_patterns = $this->match_patterns( $line, $patterns );
292
			} elseif ( $reading_exceptions ) {
293
				$reading_exceptions = $this->match_exceptions( $line, $exceptions );
294
			} else {
295
				// Not a pattern & not an exception.
296
				if ( preg_match( '/^\s*%.*$/u', $line, $matches ) ) {
297
					$comments[] = $line;
298
				} elseif ( preg_match( '/^\s*\\\patterns\s*\{\s*(.*)$/u', $line, $matches ) ) {
299
					$reading_patterns = $this->match_patterns( $matches[1], $patterns );
300
				} elseif ( preg_match( '/^\s*\\\hyphenation\s*{\s*(.*)$/u', $line, $matches ) ) {
301
					$reading_exceptions = $this->match_exceptions( $matches[1], $exceptions );
302
				} elseif ( preg_match( '/^\s*\\\endinput.*$/u', $line, $matches ) ) {
303
					// Ignore this line completely.
304
					continue;
305
				} elseif ( preg_match( '/^\s*\\\[\w]+.*$/u', $line, $matches ) ) {
306
					// Treat other commands as comments unless we are matching exceptions or patterns.
307
					$comments[] = $line;
308
				} elseif ( preg_match( '/^\s*$/u', $line, $matches ) ) {
309
					continue; // Do nothing.
310
				} else {
311
					throw new \RangeException( "Error: unknown line $line\n" );
312
				}
313
			}
314
		}
315
316
		return $this->format_results( $patterns, $exceptions, $comments );
317
	}
318
}
319