@@ -9,14 +9,14 @@ |
||
9 | 9 | |
10 | 10 | class PHP extends PrintableAscii |
11 | 11 | { |
12 | - /** {@inheritdoc} */ |
|
13 | - protected $maxValue = 0x10FFFF; |
|
12 | + /** {@inheritdoc} */ |
|
13 | + protected $maxValue = 0x10FFFF; |
|
14 | 14 | |
15 | - /** |
|
16 | - * {@inheritdoc} |
|
17 | - */ |
|
18 | - protected function escapeUnicode($cp) |
|
19 | - { |
|
20 | - return sprintf('\\x{%04X}', $cp); |
|
21 | - } |
|
15 | + /** |
|
16 | + * {@inheritdoc} |
|
17 | + */ |
|
18 | + protected function escapeUnicode($cp) |
|
19 | + { |
|
20 | + return sprintf('\\x{%04X}', $cp); |
|
21 | + } |
|
22 | 22 | } |
23 | 23 | \ No newline at end of file |
@@ -11,34 +11,34 @@ |
||
11 | 11 | |
12 | 12 | abstract class BaseImplementation implements OutputInterface |
13 | 13 | { |
14 | - /** |
|
15 | - * @var integer |
|
16 | - */ |
|
17 | - protected $maxValue = 0; |
|
14 | + /** |
|
15 | + * @var integer |
|
16 | + */ |
|
17 | + protected $maxValue = 0; |
|
18 | 18 | |
19 | - /** |
|
20 | - * @var integer |
|
21 | - */ |
|
22 | - protected $minValue = 0; |
|
19 | + /** |
|
20 | + * @var integer |
|
21 | + */ |
|
22 | + protected $minValue = 0; |
|
23 | 23 | |
24 | - /** |
|
25 | - * {@inheritdoc} |
|
26 | - */ |
|
27 | - public function output($value) |
|
28 | - { |
|
29 | - if ($value < $this->minValue || $value > $this->maxValue) |
|
30 | - { |
|
31 | - throw new InvalidArgumentException('Value ' . $value . ' is out of bounds (' . $this->minValue . '..' . $this->maxValue . ')'); |
|
32 | - } |
|
24 | + /** |
|
25 | + * {@inheritdoc} |
|
26 | + */ |
|
27 | + public function output($value) |
|
28 | + { |
|
29 | + if ($value < $this->minValue || $value > $this->maxValue) |
|
30 | + { |
|
31 | + throw new InvalidArgumentException('Value ' . $value . ' is out of bounds (' . $this->minValue . '..' . $this->maxValue . ')'); |
|
32 | + } |
|
33 | 33 | |
34 | - return $this->outputValidValue($value); |
|
35 | - } |
|
34 | + return $this->outputValidValue($value); |
|
35 | + } |
|
36 | 36 | |
37 | - /** |
|
38 | - * Serialize a valid value into a character |
|
39 | - * |
|
40 | - * @param integer $value |
|
41 | - * @return string |
|
42 | - */ |
|
43 | - abstract protected function outputValidValue($value); |
|
37 | + /** |
|
38 | + * Serialize a valid value into a character |
|
39 | + * |
|
40 | + * @param integer $value |
|
41 | + * @return string |
|
42 | + */ |
|
43 | + abstract protected function outputValidValue($value); |
|
44 | 44 | } |
45 | 45 | \ No newline at end of file |
@@ -11,52 +11,52 @@ |
||
11 | 11 | |
12 | 12 | class Utf8 implements InputInterface |
13 | 13 | { |
14 | - /** |
|
15 | - * {@inheritdoc} |
|
16 | - */ |
|
17 | - public function split($string) |
|
18 | - { |
|
19 | - if (preg_match_all('(.)us', $string, $matches) === false) |
|
20 | - { |
|
21 | - throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
22 | - } |
|
14 | + /** |
|
15 | + * {@inheritdoc} |
|
16 | + */ |
|
17 | + public function split($string) |
|
18 | + { |
|
19 | + if (preg_match_all('(.)us', $string, $matches) === false) |
|
20 | + { |
|
21 | + throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
22 | + } |
|
23 | 23 | |
24 | - return $this->charsToCodepoints($matches[0]); |
|
25 | - } |
|
24 | + return $this->charsToCodepoints($matches[0]); |
|
25 | + } |
|
26 | 26 | |
27 | - /** |
|
28 | - * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
29 | - * |
|
30 | - * @param string[] $chars |
|
31 | - * @return integer[] |
|
32 | - */ |
|
33 | - protected function charsToCodepoints(array $chars) |
|
34 | - { |
|
35 | - return array_map([$this, 'cp'], $chars); |
|
36 | - } |
|
27 | + /** |
|
28 | + * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
29 | + * |
|
30 | + * @param string[] $chars |
|
31 | + * @return integer[] |
|
32 | + */ |
|
33 | + protected function charsToCodepoints(array $chars) |
|
34 | + { |
|
35 | + return array_map([$this, 'cp'], $chars); |
|
36 | + } |
|
37 | 37 | |
38 | - /** |
|
39 | - * Compute and return the Unicode codepoint for given UTF-8 char |
|
40 | - * |
|
41 | - * @param string $char UTF-8 char |
|
42 | - * @return integer |
|
43 | - */ |
|
44 | - protected function cp($char) |
|
45 | - { |
|
46 | - $cp = ord($char[0]); |
|
47 | - if ($cp >= 0xF0) |
|
48 | - { |
|
49 | - $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080; |
|
50 | - } |
|
51 | - elseif ($cp >= 0xE0) |
|
52 | - { |
|
53 | - $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080; |
|
54 | - } |
|
55 | - elseif ($cp >= 0xC0) |
|
56 | - { |
|
57 | - $cp = ($cp << 6) + ord($char[1]) - 0x3080; |
|
58 | - } |
|
38 | + /** |
|
39 | + * Compute and return the Unicode codepoint for given UTF-8 char |
|
40 | + * |
|
41 | + * @param string $char UTF-8 char |
|
42 | + * @return integer |
|
43 | + */ |
|
44 | + protected function cp($char) |
|
45 | + { |
|
46 | + $cp = ord($char[0]); |
|
47 | + if ($cp >= 0xF0) |
|
48 | + { |
|
49 | + $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080; |
|
50 | + } |
|
51 | + elseif ($cp >= 0xE0) |
|
52 | + { |
|
53 | + $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080; |
|
54 | + } |
|
55 | + elseif ($cp >= 0xC0) |
|
56 | + { |
|
57 | + $cp = ($cp << 6) + ord($char[1]) - 0x3080; |
|
58 | + } |
|
59 | 59 | |
60 | - return $cp; |
|
61 | - } |
|
60 | + return $cp; |
|
61 | + } |
|
62 | 62 | } |
63 | 63 | \ No newline at end of file |
@@ -18,145 +18,145 @@ |
||
18 | 18 | |
19 | 19 | class Builder |
20 | 20 | { |
21 | - /** |
|
22 | - * @var InputInterface |
|
23 | - */ |
|
24 | - protected $input; |
|
25 | - |
|
26 | - /** |
|
27 | - * @var Runner |
|
28 | - */ |
|
29 | - protected $runner; |
|
30 | - |
|
31 | - /** |
|
32 | - * @var Serializer |
|
33 | - */ |
|
34 | - protected $serializer; |
|
35 | - |
|
36 | - /** |
|
37 | - * @param array $config |
|
38 | - */ |
|
39 | - public function __construct(array $config = []) |
|
40 | - { |
|
41 | - $config += [ |
|
42 | - 'delimiter' => '/', |
|
43 | - 'input' => 'Bytes', |
|
44 | - 'output' => 'Bytes' |
|
45 | - ]; |
|
46 | - |
|
47 | - $this->setInput($config['input']); |
|
48 | - $this->setSerializer($config['output'], $config['delimiter']); |
|
49 | - $this->setRunner(); |
|
50 | - } |
|
51 | - |
|
52 | - /** |
|
53 | - * Build and return a regular expression that matches all of the given strings |
|
54 | - * |
|
55 | - * @param string[] $strings Literal strings to be matched |
|
56 | - * @return string Regular expression (without delimiters) |
|
57 | - */ |
|
58 | - public function build(array $strings) |
|
59 | - { |
|
60 | - $strings = array_unique($strings); |
|
61 | - if ($this->isEmpty($strings)) |
|
62 | - { |
|
63 | - return ''; |
|
64 | - } |
|
65 | - |
|
66 | - $strings = $this->splitStrings($strings); |
|
67 | - usort($strings, __CLASS__ . '::compareStrings'); |
|
68 | - $strings = $this->runner->run($strings); |
|
69 | - |
|
70 | - return $this->serializer->serializeStrings($strings); |
|
71 | - } |
|
72 | - |
|
73 | - /** |
|
74 | - * Compare two split strings |
|
75 | - * |
|
76 | - * Will sort strings in ascending order |
|
77 | - * |
|
78 | - * @param integer[] $a |
|
79 | - * @param integer[] $b |
|
80 | - * @return integer |
|
81 | - */ |
|
82 | - protected function compareStrings(array $a, array $b) |
|
83 | - { |
|
84 | - $i = -1; |
|
85 | - $cnt = min(count($a), count($b)); |
|
86 | - while (++$i < $cnt) |
|
87 | - { |
|
88 | - if ($a[$i] !== $b[$i]) |
|
89 | - { |
|
90 | - return $a[$i] - $b[$i]; |
|
91 | - } |
|
92 | - } |
|
93 | - |
|
94 | - return count($a) - count($b); |
|
95 | - } |
|
96 | - |
|
97 | - /** |
|
98 | - * Test whether the list of strings is empty |
|
99 | - * |
|
100 | - * @param string[] $strings |
|
101 | - * @return bool |
|
102 | - */ |
|
103 | - protected function isEmpty(array $strings) |
|
104 | - { |
|
105 | - return (empty($strings) || $strings === ['']); |
|
106 | - } |
|
107 | - |
|
108 | - /** |
|
109 | - * Set the InputInterface instance in $this->input |
|
110 | - * |
|
111 | - * @param string $inputType |
|
112 | - * @return void |
|
113 | - */ |
|
114 | - protected function setInput($inputType) |
|
115 | - { |
|
116 | - $className = __NAMESPACE__ . '\\Input\\' . $inputType; |
|
117 | - $this->input = new $className; |
|
118 | - } |
|
119 | - |
|
120 | - /** |
|
121 | - * Set the Runner instance $in this->runner |
|
122 | - * |
|
123 | - * @return void |
|
124 | - */ |
|
125 | - protected function setRunner() |
|
126 | - { |
|
127 | - $this->runner = new Runner; |
|
128 | - $this->runner->addPass(new MergePrefix); |
|
129 | - $this->runner->addPass(new GroupSingleCharacters); |
|
130 | - $this->runner->addPass(new Recurse($this->runner)); |
|
131 | - $this->runner->addPass(new PromoteSingleStrings); |
|
132 | - $this->runner->addPass(new MergeSuffix); |
|
133 | - $this->runner->addPass(new CoalesceSingleCharacterPrefix); |
|
134 | - } |
|
135 | - |
|
136 | - /** |
|
137 | - * Set the Serializer instance in $this->serializer |
|
138 | - * |
|
139 | - * @param string $outputType |
|
140 | - * @param string $delimiter |
|
141 | - * @return void |
|
142 | - */ |
|
143 | - protected function setSerializer($outputType, $delimiter) |
|
144 | - { |
|
145 | - $className = __NAMESPACE__ . '\\Output\\' . $outputType; |
|
146 | - $output = new $className; |
|
147 | - $escaper = new Escaper($delimiter); |
|
148 | - |
|
149 | - $this->serializer = new Serializer($output, $escaper); |
|
150 | - } |
|
151 | - |
|
152 | - /** |
|
153 | - * Split all given strings by character |
|
154 | - * |
|
155 | - * @param string[] $strings List of strings |
|
156 | - * @return array[] List of arrays |
|
157 | - */ |
|
158 | - protected function splitStrings(array $strings) |
|
159 | - { |
|
160 | - return array_map([$this->input, 'split'], $strings); |
|
161 | - } |
|
21 | + /** |
|
22 | + * @var InputInterface |
|
23 | + */ |
|
24 | + protected $input; |
|
25 | + |
|
26 | + /** |
|
27 | + * @var Runner |
|
28 | + */ |
|
29 | + protected $runner; |
|
30 | + |
|
31 | + /** |
|
32 | + * @var Serializer |
|
33 | + */ |
|
34 | + protected $serializer; |
|
35 | + |
|
36 | + /** |
|
37 | + * @param array $config |
|
38 | + */ |
|
39 | + public function __construct(array $config = []) |
|
40 | + { |
|
41 | + $config += [ |
|
42 | + 'delimiter' => '/', |
|
43 | + 'input' => 'Bytes', |
|
44 | + 'output' => 'Bytes' |
|
45 | + ]; |
|
46 | + |
|
47 | + $this->setInput($config['input']); |
|
48 | + $this->setSerializer($config['output'], $config['delimiter']); |
|
49 | + $this->setRunner(); |
|
50 | + } |
|
51 | + |
|
52 | + /** |
|
53 | + * Build and return a regular expression that matches all of the given strings |
|
54 | + * |
|
55 | + * @param string[] $strings Literal strings to be matched |
|
56 | + * @return string Regular expression (without delimiters) |
|
57 | + */ |
|
58 | + public function build(array $strings) |
|
59 | + { |
|
60 | + $strings = array_unique($strings); |
|
61 | + if ($this->isEmpty($strings)) |
|
62 | + { |
|
63 | + return ''; |
|
64 | + } |
|
65 | + |
|
66 | + $strings = $this->splitStrings($strings); |
|
67 | + usort($strings, __CLASS__ . '::compareStrings'); |
|
68 | + $strings = $this->runner->run($strings); |
|
69 | + |
|
70 | + return $this->serializer->serializeStrings($strings); |
|
71 | + } |
|
72 | + |
|
73 | + /** |
|
74 | + * Compare two split strings |
|
75 | + * |
|
76 | + * Will sort strings in ascending order |
|
77 | + * |
|
78 | + * @param integer[] $a |
|
79 | + * @param integer[] $b |
|
80 | + * @return integer |
|
81 | + */ |
|
82 | + protected function compareStrings(array $a, array $b) |
|
83 | + { |
|
84 | + $i = -1; |
|
85 | + $cnt = min(count($a), count($b)); |
|
86 | + while (++$i < $cnt) |
|
87 | + { |
|
88 | + if ($a[$i] !== $b[$i]) |
|
89 | + { |
|
90 | + return $a[$i] - $b[$i]; |
|
91 | + } |
|
92 | + } |
|
93 | + |
|
94 | + return count($a) - count($b); |
|
95 | + } |
|
96 | + |
|
97 | + /** |
|
98 | + * Test whether the list of strings is empty |
|
99 | + * |
|
100 | + * @param string[] $strings |
|
101 | + * @return bool |
|
102 | + */ |
|
103 | + protected function isEmpty(array $strings) |
|
104 | + { |
|
105 | + return (empty($strings) || $strings === ['']); |
|
106 | + } |
|
107 | + |
|
108 | + /** |
|
109 | + * Set the InputInterface instance in $this->input |
|
110 | + * |
|
111 | + * @param string $inputType |
|
112 | + * @return void |
|
113 | + */ |
|
114 | + protected function setInput($inputType) |
|
115 | + { |
|
116 | + $className = __NAMESPACE__ . '\\Input\\' . $inputType; |
|
117 | + $this->input = new $className; |
|
118 | + } |
|
119 | + |
|
120 | + /** |
|
121 | + * Set the Runner instance $in this->runner |
|
122 | + * |
|
123 | + * @return void |
|
124 | + */ |
|
125 | + protected function setRunner() |
|
126 | + { |
|
127 | + $this->runner = new Runner; |
|
128 | + $this->runner->addPass(new MergePrefix); |
|
129 | + $this->runner->addPass(new GroupSingleCharacters); |
|
130 | + $this->runner->addPass(new Recurse($this->runner)); |
|
131 | + $this->runner->addPass(new PromoteSingleStrings); |
|
132 | + $this->runner->addPass(new MergeSuffix); |
|
133 | + $this->runner->addPass(new CoalesceSingleCharacterPrefix); |
|
134 | + } |
|
135 | + |
|
136 | + /** |
|
137 | + * Set the Serializer instance in $this->serializer |
|
138 | + * |
|
139 | + * @param string $outputType |
|
140 | + * @param string $delimiter |
|
141 | + * @return void |
|
142 | + */ |
|
143 | + protected function setSerializer($outputType, $delimiter) |
|
144 | + { |
|
145 | + $className = __NAMESPACE__ . '\\Output\\' . $outputType; |
|
146 | + $output = new $className; |
|
147 | + $escaper = new Escaper($delimiter); |
|
148 | + |
|
149 | + $this->serializer = new Serializer($output, $escaper); |
|
150 | + } |
|
151 | + |
|
152 | + /** |
|
153 | + * Split all given strings by character |
|
154 | + * |
|
155 | + * @param string[] $strings List of strings |
|
156 | + * @return array[] List of arrays |
|
157 | + */ |
|
158 | + protected function splitStrings(array $strings) |
|
159 | + { |
|
160 | + return array_map([$this->input, 'split'], $strings); |
|
161 | + } |
|
162 | 162 | } |
163 | 163 | \ No newline at end of file |
@@ -11,202 +11,202 @@ |
||
11 | 11 | |
12 | 12 | class Serializer |
13 | 13 | { |
14 | - /** |
|
15 | - * @var Escaper |
|
16 | - */ |
|
17 | - protected $escaper; |
|
18 | - |
|
19 | - /** |
|
20 | - * @var OutputInterface |
|
21 | - */ |
|
22 | - protected $output; |
|
23 | - |
|
24 | - /** |
|
25 | - * @param OutputInterface $output |
|
26 | - * @param Escaper $escaper |
|
27 | - */ |
|
28 | - public function __construct(OutputInterface $output, Escaper $escaper) |
|
29 | - { |
|
30 | - $this->escaper = $escaper; |
|
31 | - $this->output = $output; |
|
32 | - } |
|
33 | - |
|
34 | - /** |
|
35 | - * Serialize given strings into a regular expression |
|
36 | - * |
|
37 | - * @param array[] $strings |
|
38 | - * @return string |
|
39 | - */ |
|
40 | - public function serializeStrings(array $strings) |
|
41 | - { |
|
42 | - $info = $this->analyzeStrings($strings); |
|
43 | - $alternations = $this->buildAlternations($info); |
|
44 | - $expr = implode('|', $alternations); |
|
45 | - |
|
46 | - if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
47 | - { |
|
48 | - $expr = '(?:' . $expr . ')'; |
|
49 | - } |
|
50 | - |
|
51 | - return $expr . $info['quantifier']; |
|
52 | - } |
|
53 | - |
|
54 | - /** |
|
55 | - * Analyze given strings to determine how to serialize them |
|
56 | - * |
|
57 | - * The returned array may contains any of the following elements: |
|
58 | - * |
|
59 | - * - (string) quantifier Either '' or '?' |
|
60 | - * - (array) chars List of values from single-char strings |
|
61 | - * - (array) strings List of multi-char strings |
|
62 | - * |
|
63 | - * @param array[] $strings |
|
64 | - * @return array |
|
65 | - */ |
|
66 | - protected function analyzeStrings(array $strings) |
|
67 | - { |
|
68 | - $info = ['quantifier' => '']; |
|
69 | - if ($strings[0] === []) |
|
70 | - { |
|
71 | - $info['quantifier'] = '?'; |
|
72 | - unset($strings[0]); |
|
73 | - } |
|
74 | - |
|
75 | - $chars = $this->getChars($strings); |
|
76 | - if (count($chars) > 1) |
|
77 | - { |
|
78 | - $info['chars'] = array_values($chars); |
|
79 | - $strings = array_diff_key($strings, $chars); |
|
80 | - } |
|
81 | - |
|
82 | - $info['strings'] = array_values($strings); |
|
83 | - |
|
84 | - return $info; |
|
85 | - } |
|
86 | - |
|
87 | - /** |
|
88 | - * Build the list of alternations based on given info |
|
89 | - * |
|
90 | - * @param array $info |
|
91 | - * @return string[] |
|
92 | - */ |
|
93 | - protected function buildAlternations(array $info) |
|
94 | - { |
|
95 | - $alternations = []; |
|
96 | - if (!empty($info['chars'])) |
|
97 | - { |
|
98 | - $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
99 | - } |
|
100 | - foreach ($info['strings'] as $string) |
|
101 | - { |
|
102 | - $alternations[] = $this->serializeString($string); |
|
103 | - } |
|
104 | - |
|
105 | - return $alternations; |
|
106 | - } |
|
107 | - |
|
108 | - /** |
|
109 | - * Return the portion of strings that are composed of a single character |
|
110 | - * |
|
111 | - * @param array[] |
|
112 | - * @return array String key => codepoint |
|
113 | - */ |
|
114 | - protected function getChars(array $strings) |
|
115 | - { |
|
116 | - $chars = []; |
|
117 | - foreach ($strings as $k => $string) |
|
118 | - { |
|
119 | - if (count($string) === 1 && !is_array($string[0])) |
|
120 | - { |
|
121 | - $chars[$k] = $string[0]; |
|
122 | - } |
|
123 | - } |
|
124 | - |
|
125 | - return $chars; |
|
126 | - } |
|
127 | - |
|
128 | - /** |
|
129 | - * Get the list of ranges that cover all given values |
|
130 | - * |
|
131 | - * @param integer[] $values Ordered list of values |
|
132 | - * @return array[] List of ranges in the form [start, end] |
|
133 | - */ |
|
134 | - protected function getRanges(array $values) |
|
135 | - { |
|
136 | - $i = 0; |
|
137 | - $cnt = count($values); |
|
138 | - $start = $values[0]; |
|
139 | - $end = $start; |
|
140 | - $ranges = []; |
|
141 | - while (++$i < $cnt) |
|
142 | - { |
|
143 | - if ($values[$i] === $end + 1) |
|
144 | - { |
|
145 | - ++$end; |
|
146 | - } |
|
147 | - else |
|
148 | - { |
|
149 | - $ranges[] = [$start, $end]; |
|
150 | - $start = $end = $values[$i]; |
|
151 | - } |
|
152 | - } |
|
153 | - $ranges[] = [$start, $end]; |
|
154 | - |
|
155 | - return $ranges; |
|
156 | - } |
|
157 | - |
|
158 | - /** |
|
159 | - * Test whether a string is optional and has more than one character |
|
160 | - * |
|
161 | - * @param array $info |
|
162 | - * @return bool |
|
163 | - */ |
|
164 | - protected function isOneOptionalString(array $info) |
|
165 | - { |
|
166 | - // Test whether the first string has a quantifier and more than one element |
|
167 | - return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
168 | - } |
|
169 | - |
|
170 | - /** |
|
171 | - * Serialize a given list of values into a character class |
|
172 | - * |
|
173 | - * @param integer[] $values |
|
174 | - * @return string |
|
175 | - */ |
|
176 | - protected function serializeCharacterClass(array $values) |
|
177 | - { |
|
178 | - $expr = '['; |
|
179 | - foreach ($this->getRanges($values) as list($start, $end)) |
|
180 | - { |
|
181 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
182 | - if ($end > $start) |
|
183 | - { |
|
184 | - if ($end > $start + 1) |
|
185 | - { |
|
186 | - $expr .= '-'; |
|
187 | - } |
|
188 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
189 | - } |
|
190 | - } |
|
191 | - $expr .= ']'; |
|
192 | - |
|
193 | - return $expr; |
|
194 | - } |
|
195 | - |
|
196 | - /** |
|
197 | - * Serialize a given string into a regular expression |
|
198 | - * |
|
199 | - * @param array $string |
|
200 | - * @return string |
|
201 | - */ |
|
202 | - protected function serializeString(array $string) |
|
203 | - { |
|
204 | - $expr = ''; |
|
205 | - foreach ($string as $element) |
|
206 | - { |
|
207 | - $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
208 | - } |
|
209 | - |
|
210 | - return $expr; |
|
211 | - } |
|
14 | + /** |
|
15 | + * @var Escaper |
|
16 | + */ |
|
17 | + protected $escaper; |
|
18 | + |
|
19 | + /** |
|
20 | + * @var OutputInterface |
|
21 | + */ |
|
22 | + protected $output; |
|
23 | + |
|
24 | + /** |
|
25 | + * @param OutputInterface $output |
|
26 | + * @param Escaper $escaper |
|
27 | + */ |
|
28 | + public function __construct(OutputInterface $output, Escaper $escaper) |
|
29 | + { |
|
30 | + $this->escaper = $escaper; |
|
31 | + $this->output = $output; |
|
32 | + } |
|
33 | + |
|
34 | + /** |
|
35 | + * Serialize given strings into a regular expression |
|
36 | + * |
|
37 | + * @param array[] $strings |
|
38 | + * @return string |
|
39 | + */ |
|
40 | + public function serializeStrings(array $strings) |
|
41 | + { |
|
42 | + $info = $this->analyzeStrings($strings); |
|
43 | + $alternations = $this->buildAlternations($info); |
|
44 | + $expr = implode('|', $alternations); |
|
45 | + |
|
46 | + if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
47 | + { |
|
48 | + $expr = '(?:' . $expr . ')'; |
|
49 | + } |
|
50 | + |
|
51 | + return $expr . $info['quantifier']; |
|
52 | + } |
|
53 | + |
|
54 | + /** |
|
55 | + * Analyze given strings to determine how to serialize them |
|
56 | + * |
|
57 | + * The returned array may contains any of the following elements: |
|
58 | + * |
|
59 | + * - (string) quantifier Either '' or '?' |
|
60 | + * - (array) chars List of values from single-char strings |
|
61 | + * - (array) strings List of multi-char strings |
|
62 | + * |
|
63 | + * @param array[] $strings |
|
64 | + * @return array |
|
65 | + */ |
|
66 | + protected function analyzeStrings(array $strings) |
|
67 | + { |
|
68 | + $info = ['quantifier' => '']; |
|
69 | + if ($strings[0] === []) |
|
70 | + { |
|
71 | + $info['quantifier'] = '?'; |
|
72 | + unset($strings[0]); |
|
73 | + } |
|
74 | + |
|
75 | + $chars = $this->getChars($strings); |
|
76 | + if (count($chars) > 1) |
|
77 | + { |
|
78 | + $info['chars'] = array_values($chars); |
|
79 | + $strings = array_diff_key($strings, $chars); |
|
80 | + } |
|
81 | + |
|
82 | + $info['strings'] = array_values($strings); |
|
83 | + |
|
84 | + return $info; |
|
85 | + } |
|
86 | + |
|
87 | + /** |
|
88 | + * Build the list of alternations based on given info |
|
89 | + * |
|
90 | + * @param array $info |
|
91 | + * @return string[] |
|
92 | + */ |
|
93 | + protected function buildAlternations(array $info) |
|
94 | + { |
|
95 | + $alternations = []; |
|
96 | + if (!empty($info['chars'])) |
|
97 | + { |
|
98 | + $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
99 | + } |
|
100 | + foreach ($info['strings'] as $string) |
|
101 | + { |
|
102 | + $alternations[] = $this->serializeString($string); |
|
103 | + } |
|
104 | + |
|
105 | + return $alternations; |
|
106 | + } |
|
107 | + |
|
108 | + /** |
|
109 | + * Return the portion of strings that are composed of a single character |
|
110 | + * |
|
111 | + * @param array[] |
|
112 | + * @return array String key => codepoint |
|
113 | + */ |
|
114 | + protected function getChars(array $strings) |
|
115 | + { |
|
116 | + $chars = []; |
|
117 | + foreach ($strings as $k => $string) |
|
118 | + { |
|
119 | + if (count($string) === 1 && !is_array($string[0])) |
|
120 | + { |
|
121 | + $chars[$k] = $string[0]; |
|
122 | + } |
|
123 | + } |
|
124 | + |
|
125 | + return $chars; |
|
126 | + } |
|
127 | + |
|
128 | + /** |
|
129 | + * Get the list of ranges that cover all given values |
|
130 | + * |
|
131 | + * @param integer[] $values Ordered list of values |
|
132 | + * @return array[] List of ranges in the form [start, end] |
|
133 | + */ |
|
134 | + protected function getRanges(array $values) |
|
135 | + { |
|
136 | + $i = 0; |
|
137 | + $cnt = count($values); |
|
138 | + $start = $values[0]; |
|
139 | + $end = $start; |
|
140 | + $ranges = []; |
|
141 | + while (++$i < $cnt) |
|
142 | + { |
|
143 | + if ($values[$i] === $end + 1) |
|
144 | + { |
|
145 | + ++$end; |
|
146 | + } |
|
147 | + else |
|
148 | + { |
|
149 | + $ranges[] = [$start, $end]; |
|
150 | + $start = $end = $values[$i]; |
|
151 | + } |
|
152 | + } |
|
153 | + $ranges[] = [$start, $end]; |
|
154 | + |
|
155 | + return $ranges; |
|
156 | + } |
|
157 | + |
|
158 | + /** |
|
159 | + * Test whether a string is optional and has more than one character |
|
160 | + * |
|
161 | + * @param array $info |
|
162 | + * @return bool |
|
163 | + */ |
|
164 | + protected function isOneOptionalString(array $info) |
|
165 | + { |
|
166 | + // Test whether the first string has a quantifier and more than one element |
|
167 | + return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
168 | + } |
|
169 | + |
|
170 | + /** |
|
171 | + * Serialize a given list of values into a character class |
|
172 | + * |
|
173 | + * @param integer[] $values |
|
174 | + * @return string |
|
175 | + */ |
|
176 | + protected function serializeCharacterClass(array $values) |
|
177 | + { |
|
178 | + $expr = '['; |
|
179 | + foreach ($this->getRanges($values) as list($start, $end)) |
|
180 | + { |
|
181 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
182 | + if ($end > $start) |
|
183 | + { |
|
184 | + if ($end > $start + 1) |
|
185 | + { |
|
186 | + $expr .= '-'; |
|
187 | + } |
|
188 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
189 | + } |
|
190 | + } |
|
191 | + $expr .= ']'; |
|
192 | + |
|
193 | + return $expr; |
|
194 | + } |
|
195 | + |
|
196 | + /** |
|
197 | + * Serialize a given string into a regular expression |
|
198 | + * |
|
199 | + * @param array $string |
|
200 | + * @return string |
|
201 | + */ |
|
202 | + protected function serializeString(array $string) |
|
203 | + { |
|
204 | + $expr = ''; |
|
205 | + foreach ($string as $element) |
|
206 | + { |
|
207 | + $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
208 | + } |
|
209 | + |
|
210 | + return $expr; |
|
211 | + } |
|
212 | 212 | } |
213 | 213 | \ No newline at end of file |
@@ -9,14 +9,14 @@ |
||
9 | 9 | |
10 | 10 | class JavaScript extends PrintableAscii |
11 | 11 | { |
12 | - /** {@inheritdoc} */ |
|
13 | - protected $maxValue = 0x10FFFF; |
|
12 | + /** {@inheritdoc} */ |
|
13 | + protected $maxValue = 0x10FFFF; |
|
14 | 14 | |
15 | - /** |
|
16 | - * {@inheritdoc} |
|
17 | - */ |
|
18 | - protected function escapeUnicode($cp) |
|
19 | - { |
|
20 | - return sprintf(($cp > 0xFFFF) ? '\\u{%X}' : '\\u%04X', $cp); |
|
21 | - } |
|
15 | + /** |
|
16 | + * {@inheritdoc} |
|
17 | + */ |
|
18 | + protected function escapeUnicode($cp) |
|
19 | + { |
|
20 | + return sprintf(($cp > 0xFFFF) ? '\\u{%X}' : '\\u%04X', $cp); |
|
21 | + } |
|
22 | 22 | } |
23 | 23 | \ No newline at end of file |
@@ -13,35 +13,35 @@ |
||
13 | 13 | */ |
14 | 14 | class PromoteSingleStrings extends AbstractPass |
15 | 15 | { |
16 | - /** |
|
17 | - * {@inheritdoc} |
|
18 | - */ |
|
19 | - protected function runPass(array $strings) |
|
20 | - { |
|
21 | - return array_map([$this, 'promoteSingleStrings'], $strings); |
|
22 | - } |
|
16 | + /** |
|
17 | + * {@inheritdoc} |
|
18 | + */ |
|
19 | + protected function runPass(array $strings) |
|
20 | + { |
|
21 | + return array_map([$this, 'promoteSingleStrings'], $strings); |
|
22 | + } |
|
23 | 23 | |
24 | - /** |
|
25 | - * Promote single strings found inside given string |
|
26 | - * |
|
27 | - * @param array $string Original string |
|
28 | - * @return array Modified string |
|
29 | - */ |
|
30 | - protected function promoteSingleStrings(array $string) |
|
31 | - { |
|
32 | - $newString = []; |
|
33 | - foreach ($string as $element) |
|
34 | - { |
|
35 | - if (is_array($element) && count($element) === 1) |
|
36 | - { |
|
37 | - $newString = array_merge($newString, $element[0]); |
|
38 | - } |
|
39 | - else |
|
40 | - { |
|
41 | - $newString[] = $element; |
|
42 | - } |
|
43 | - } |
|
24 | + /** |
|
25 | + * Promote single strings found inside given string |
|
26 | + * |
|
27 | + * @param array $string Original string |
|
28 | + * @return array Modified string |
|
29 | + */ |
|
30 | + protected function promoteSingleStrings(array $string) |
|
31 | + { |
|
32 | + $newString = []; |
|
33 | + foreach ($string as $element) |
|
34 | + { |
|
35 | + if (is_array($element) && count($element) === 1) |
|
36 | + { |
|
37 | + $newString = array_merge($newString, $element[0]); |
|
38 | + } |
|
39 | + else |
|
40 | + { |
|
41 | + $newString[] = $element; |
|
42 | + } |
|
43 | + } |
|
44 | 44 | |
45 | - return $newString; |
|
46 | - } |
|
45 | + return $newString; |
|
46 | + } |
|
47 | 47 | } |
48 | 48 | \ No newline at end of file |
@@ -9,75 +9,75 @@ |
||
9 | 9 | |
10 | 10 | abstract class AbstractPass implements PassInterface |
11 | 11 | { |
12 | - /** |
|
13 | - * @var bool Whether the current set of strings is optional |
|
14 | - */ |
|
15 | - protected $isOptional; |
|
12 | + /** |
|
13 | + * @var bool Whether the current set of strings is optional |
|
14 | + */ |
|
15 | + protected $isOptional; |
|
16 | 16 | |
17 | - /** |
|
18 | - * {@inheritdoc} |
|
19 | - */ |
|
20 | - public function run(array $strings) |
|
21 | - { |
|
22 | - $strings = $this->beforeRun($strings); |
|
23 | - if ($this->canRun($strings)) |
|
24 | - { |
|
25 | - $strings = $this->runPass($strings); |
|
26 | - } |
|
27 | - $strings = $this->afterRun($strings); |
|
17 | + /** |
|
18 | + * {@inheritdoc} |
|
19 | + */ |
|
20 | + public function run(array $strings) |
|
21 | + { |
|
22 | + $strings = $this->beforeRun($strings); |
|
23 | + if ($this->canRun($strings)) |
|
24 | + { |
|
25 | + $strings = $this->runPass($strings); |
|
26 | + } |
|
27 | + $strings = $this->afterRun($strings); |
|
28 | 28 | |
29 | - return $strings; |
|
30 | - } |
|
29 | + return $strings; |
|
30 | + } |
|
31 | 31 | |
32 | - /** |
|
33 | - * Process the list of strings after the pass is run |
|
34 | - * |
|
35 | - * @param array[] $strings |
|
36 | - * @return array[] |
|
37 | - */ |
|
38 | - protected function afterRun(array $strings) |
|
39 | - { |
|
40 | - if ($this->isOptional && $strings[0] !== []) |
|
41 | - { |
|
42 | - array_unshift($strings, []); |
|
43 | - } |
|
32 | + /** |
|
33 | + * Process the list of strings after the pass is run |
|
34 | + * |
|
35 | + * @param array[] $strings |
|
36 | + * @return array[] |
|
37 | + */ |
|
38 | + protected function afterRun(array $strings) |
|
39 | + { |
|
40 | + if ($this->isOptional && $strings[0] !== []) |
|
41 | + { |
|
42 | + array_unshift($strings, []); |
|
43 | + } |
|
44 | 44 | |
45 | - return $strings; |
|
46 | - } |
|
45 | + return $strings; |
|
46 | + } |
|
47 | 47 | |
48 | - /** |
|
49 | - * Prepare the list of strings before the pass is run |
|
50 | - * |
|
51 | - * @param array[] $strings |
|
52 | - * @return array[] |
|
53 | - */ |
|
54 | - protected function beforeRun(array $strings) |
|
55 | - { |
|
56 | - $this->isOptional = (isset($strings[0]) && $strings[0] === []); |
|
57 | - if ($this->isOptional) |
|
58 | - { |
|
59 | - array_shift($strings); |
|
60 | - } |
|
48 | + /** |
|
49 | + * Prepare the list of strings before the pass is run |
|
50 | + * |
|
51 | + * @param array[] $strings |
|
52 | + * @return array[] |
|
53 | + */ |
|
54 | + protected function beforeRun(array $strings) |
|
55 | + { |
|
56 | + $this->isOptional = (isset($strings[0]) && $strings[0] === []); |
|
57 | + if ($this->isOptional) |
|
58 | + { |
|
59 | + array_shift($strings); |
|
60 | + } |
|
61 | 61 | |
62 | - return $strings; |
|
63 | - } |
|
62 | + return $strings; |
|
63 | + } |
|
64 | 64 | |
65 | - /** |
|
66 | - * Test whether this pass can be run on a given list of strings |
|
67 | - * |
|
68 | - * @param array[] $strings |
|
69 | - * @return bool |
|
70 | - */ |
|
71 | - protected function canRun(array $strings) |
|
72 | - { |
|
73 | - return true; |
|
74 | - } |
|
65 | + /** |
|
66 | + * Test whether this pass can be run on a given list of strings |
|
67 | + * |
|
68 | + * @param array[] $strings |
|
69 | + * @return bool |
|
70 | + */ |
|
71 | + protected function canRun(array $strings) |
|
72 | + { |
|
73 | + return true; |
|
74 | + } |
|
75 | 75 | |
76 | - /** |
|
77 | - * Run this pass on a list of strings |
|
78 | - * |
|
79 | - * @param array[] $strings |
|
80 | - * @return array[] |
|
81 | - */ |
|
82 | - abstract protected function runPass(array $strings); |
|
76 | + /** |
|
77 | + * Run this pass on a list of strings |
|
78 | + * |
|
79 | + * @param array[] $strings |
|
80 | + * @return array[] |
|
81 | + */ |
|
82 | + abstract protected function runPass(array $strings); |
|
83 | 83 | } |
84 | 84 | \ No newline at end of file |
@@ -12,67 +12,67 @@ |
||
12 | 12 | */ |
13 | 13 | class CoalesceSingleCharacterPrefix extends AbstractPass |
14 | 14 | { |
15 | - /** |
|
16 | - * {@inheritdoc} |
|
17 | - */ |
|
18 | - protected function runPass(array $strings) |
|
19 | - { |
|
20 | - $newStrings = []; |
|
21 | - foreach ($this->getEligibleKeys($strings) as $keys) |
|
22 | - { |
|
23 | - // Create a new string to hold the merged strings and replace the first element with |
|
24 | - // an empty character class |
|
25 | - $newString = $strings[$keys[0]]; |
|
26 | - $newString[0] = []; |
|
27 | - foreach ($keys as $key) |
|
28 | - { |
|
29 | - $newString[0][] = [$strings[$key][0]]; |
|
30 | - unset($strings[$key]); |
|
31 | - } |
|
32 | - $newStrings[] = $newString; |
|
33 | - } |
|
15 | + /** |
|
16 | + * {@inheritdoc} |
|
17 | + */ |
|
18 | + protected function runPass(array $strings) |
|
19 | + { |
|
20 | + $newStrings = []; |
|
21 | + foreach ($this->getEligibleKeys($strings) as $keys) |
|
22 | + { |
|
23 | + // Create a new string to hold the merged strings and replace the first element with |
|
24 | + // an empty character class |
|
25 | + $newString = $strings[$keys[0]]; |
|
26 | + $newString[0] = []; |
|
27 | + foreach ($keys as $key) |
|
28 | + { |
|
29 | + $newString[0][] = [$strings[$key][0]]; |
|
30 | + unset($strings[$key]); |
|
31 | + } |
|
32 | + $newStrings[] = $newString; |
|
33 | + } |
|
34 | 34 | |
35 | - return array_merge($newStrings, $strings); |
|
36 | - } |
|
35 | + return array_merge($newStrings, $strings); |
|
36 | + } |
|
37 | 37 | |
38 | - /** |
|
39 | - * Filter the list of eligible keys and keep those that have at least two matches |
|
40 | - * |
|
41 | - * @param array[] $eligibleKeys List of lists of keys |
|
42 | - * @return array[] |
|
43 | - */ |
|
44 | - protected function filterEligibleKeys(array $eligibleKeys) |
|
45 | - { |
|
46 | - $filteredKeys = []; |
|
47 | - foreach ($eligibleKeys as $k => $keys) |
|
48 | - { |
|
49 | - if (count($keys) > 1) |
|
50 | - { |
|
51 | - $filteredKeys[] = $keys; |
|
52 | - } |
|
53 | - } |
|
38 | + /** |
|
39 | + * Filter the list of eligible keys and keep those that have at least two matches |
|
40 | + * |
|
41 | + * @param array[] $eligibleKeys List of lists of keys |
|
42 | + * @return array[] |
|
43 | + */ |
|
44 | + protected function filterEligibleKeys(array $eligibleKeys) |
|
45 | + { |
|
46 | + $filteredKeys = []; |
|
47 | + foreach ($eligibleKeys as $k => $keys) |
|
48 | + { |
|
49 | + if (count($keys) > 1) |
|
50 | + { |
|
51 | + $filteredKeys[] = $keys; |
|
52 | + } |
|
53 | + } |
|
54 | 54 | |
55 | - return $filteredKeys; |
|
56 | - } |
|
55 | + return $filteredKeys; |
|
56 | + } |
|
57 | 57 | |
58 | - /** |
|
59 | - * Get a list of keys of strings eligible to be merged together, grouped by suffix |
|
60 | - * |
|
61 | - * @param array[] $strings |
|
62 | - * @return array[] |
|
63 | - */ |
|
64 | - protected function getEligibleKeys(array $strings) |
|
65 | - { |
|
66 | - $eligibleKeys = []; |
|
67 | - foreach ($strings as $k => $string) |
|
68 | - { |
|
69 | - if (!is_array($string[0]) && isset($string[1])) |
|
70 | - { |
|
71 | - $suffix = serialize(array_slice($string, 1)); |
|
72 | - $eligibleKeys[$suffix][] = $k; |
|
73 | - } |
|
74 | - } |
|
58 | + /** |
|
59 | + * Get a list of keys of strings eligible to be merged together, grouped by suffix |
|
60 | + * |
|
61 | + * @param array[] $strings |
|
62 | + * @return array[] |
|
63 | + */ |
|
64 | + protected function getEligibleKeys(array $strings) |
|
65 | + { |
|
66 | + $eligibleKeys = []; |
|
67 | + foreach ($strings as $k => $string) |
|
68 | + { |
|
69 | + if (!is_array($string[0]) && isset($string[1])) |
|
70 | + { |
|
71 | + $suffix = serialize(array_slice($string, 1)); |
|
72 | + $eligibleKeys[$suffix][] = $k; |
|
73 | + } |
|
74 | + } |
|
75 | 75 | |
76 | - return $this->filterEligibleKeys($eligibleKeys); |
|
77 | - } |
|
76 | + return $this->filterEligibleKeys($eligibleKeys); |
|
77 | + } |
|
78 | 78 | } |
79 | 79 | \ No newline at end of file |