@@ -9,67 +9,67 @@ |
||
9 | 9 | |
10 | 10 | class CoalesceSingleCharacterPrefix extends AbstractPass |
11 | 11 | { |
12 | - /** |
|
13 | - * {@inheritdoc} |
|
14 | - */ |
|
15 | - protected function processStrings(array $strings) |
|
16 | - { |
|
17 | - $newStrings = []; |
|
18 | - foreach ($this->getEligibleKeys($strings) as $keys) |
|
19 | - { |
|
20 | - // Create a new string to hold the merged strings and replace the first element with |
|
21 | - // an empty character class |
|
22 | - $newString = $strings[$keys[0]]; |
|
23 | - $newString[0] = []; |
|
24 | - foreach ($keys as $key) |
|
25 | - { |
|
26 | - $newString[0][] = [$strings[$key][0]]; |
|
27 | - unset($strings[$key]); |
|
28 | - } |
|
29 | - $newStrings[] = $newString; |
|
30 | - } |
|
12 | + /** |
|
13 | + * {@inheritdoc} |
|
14 | + */ |
|
15 | + protected function processStrings(array $strings) |
|
16 | + { |
|
17 | + $newStrings = []; |
|
18 | + foreach ($this->getEligibleKeys($strings) as $keys) |
|
19 | + { |
|
20 | + // Create a new string to hold the merged strings and replace the first element with |
|
21 | + // an empty character class |
|
22 | + $newString = $strings[$keys[0]]; |
|
23 | + $newString[0] = []; |
|
24 | + foreach ($keys as $key) |
|
25 | + { |
|
26 | + $newString[0][] = [$strings[$key][0]]; |
|
27 | + unset($strings[$key]); |
|
28 | + } |
|
29 | + $newStrings[] = $newString; |
|
30 | + } |
|
31 | 31 | |
32 | - return array_merge($newStrings, $strings); |
|
33 | - } |
|
32 | + return array_merge($newStrings, $strings); |
|
33 | + } |
|
34 | 34 | |
35 | - /** |
|
36 | - * Filter the list of eligible keys and keep those that have at least two matches |
|
37 | - * |
|
38 | - * @param array[] $eligibleKeys List of lists of keys |
|
39 | - * @return array[] |
|
40 | - */ |
|
41 | - protected function filterEligibleKeys(array $eligibleKeys) |
|
42 | - { |
|
43 | - $filteredKeys = []; |
|
44 | - foreach ($eligibleKeys as $k => $keys) |
|
45 | - { |
|
46 | - if (count($keys) > 1) |
|
47 | - { |
|
48 | - $filteredKeys[] = $keys; |
|
49 | - } |
|
50 | - } |
|
35 | + /** |
|
36 | + * Filter the list of eligible keys and keep those that have at least two matches |
|
37 | + * |
|
38 | + * @param array[] $eligibleKeys List of lists of keys |
|
39 | + * @return array[] |
|
40 | + */ |
|
41 | + protected function filterEligibleKeys(array $eligibleKeys) |
|
42 | + { |
|
43 | + $filteredKeys = []; |
|
44 | + foreach ($eligibleKeys as $k => $keys) |
|
45 | + { |
|
46 | + if (count($keys) > 1) |
|
47 | + { |
|
48 | + $filteredKeys[] = $keys; |
|
49 | + } |
|
50 | + } |
|
51 | 51 | |
52 | - return $filteredKeys; |
|
53 | - } |
|
52 | + return $filteredKeys; |
|
53 | + } |
|
54 | 54 | |
55 | - /** |
|
56 | - * Get a list of keys of strings eligible to be merged together, grouped by suffix |
|
57 | - * |
|
58 | - * @param array[] $strings |
|
59 | - * @return array[] |
|
60 | - */ |
|
61 | - protected function getEligibleKeys(array $strings) |
|
62 | - { |
|
63 | - $eligibleKeys = []; |
|
64 | - foreach ($strings as $k => $string) |
|
65 | - { |
|
66 | - if (!is_array($string[0]) && isset($string[1])) |
|
67 | - { |
|
68 | - $suffix = serialize(array_slice($string, 1)); |
|
69 | - $eligibleKeys[$suffix][] = $k; |
|
70 | - } |
|
71 | - } |
|
55 | + /** |
|
56 | + * Get a list of keys of strings eligible to be merged together, grouped by suffix |
|
57 | + * |
|
58 | + * @param array[] $strings |
|
59 | + * @return array[] |
|
60 | + */ |
|
61 | + protected function getEligibleKeys(array $strings) |
|
62 | + { |
|
63 | + $eligibleKeys = []; |
|
64 | + foreach ($strings as $k => $string) |
|
65 | + { |
|
66 | + if (!is_array($string[0]) && isset($string[1])) |
|
67 | + { |
|
68 | + $suffix = serialize(array_slice($string, 1)); |
|
69 | + $eligibleKeys[$suffix][] = $k; |
|
70 | + } |
|
71 | + } |
|
72 | 72 | |
73 | - return $this->filterEligibleKeys($eligibleKeys); |
|
74 | - } |
|
73 | + return $this->filterEligibleKeys($eligibleKeys); |
|
74 | + } |
|
75 | 75 | } |
76 | 76 | \ No newline at end of file |
@@ -18,134 +18,134 @@ |
||
18 | 18 | |
19 | 19 | class Builder |
20 | 20 | { |
21 | - /** |
|
22 | - * @var InputInterface |
|
23 | - */ |
|
24 | - protected $input; |
|
25 | - |
|
26 | - /** |
|
27 | - * @var Runner |
|
28 | - */ |
|
29 | - protected $runner; |
|
30 | - |
|
31 | - /** |
|
32 | - * @var Serializer |
|
33 | - */ |
|
34 | - protected $serializer; |
|
35 | - |
|
36 | - /** |
|
37 | - * @param array $config |
|
38 | - */ |
|
39 | - public function __construct(array $config = []) |
|
40 | - { |
|
41 | - $config += [ |
|
42 | - 'delimiter' => '/', |
|
43 | - 'input' => 'Bytes', |
|
44 | - 'output' => 'Bytes' |
|
45 | - ]; |
|
46 | - |
|
47 | - $this->setInput($config['input']); |
|
48 | - $this->setSerializer($config['output'], $config['delimiter']); |
|
49 | - $this->setRunner(); |
|
50 | - } |
|
51 | - |
|
52 | - /** |
|
53 | - * Build and return a regular expression that matches all of the given strings |
|
54 | - * |
|
55 | - * @param string[] $strings Literal strings to be matched |
|
56 | - * @return string Regular expression (without delimiters) |
|
57 | - */ |
|
58 | - public function build(array $strings) |
|
59 | - { |
|
60 | - $strings = array_unique($strings); |
|
61 | - if ($strings === ['']) |
|
62 | - { |
|
63 | - return ''; |
|
64 | - } |
|
65 | - |
|
66 | - $strings = $this->splitStrings($strings); |
|
67 | - usort($strings, __CLASS__ . '::compareStrings'); |
|
68 | - $strings = $this->runner->run($strings); |
|
69 | - |
|
70 | - return $this->serializer->serializeStrings($strings); |
|
71 | - } |
|
72 | - |
|
73 | - /** |
|
74 | - * Compare two split strings |
|
75 | - * |
|
76 | - * Will sort strings in ascending order |
|
77 | - * |
|
78 | - * @param integer[] $a |
|
79 | - * @param integer[] $b |
|
80 | - * @return integer |
|
81 | - */ |
|
82 | - protected function compareStrings(array $a, array $b) |
|
83 | - { |
|
84 | - $i = -1; |
|
85 | - $cnt = min(count($a), count($b)); |
|
86 | - while (++$i < $cnt) |
|
87 | - { |
|
88 | - if ($a[$i] !== $b[$i]) |
|
89 | - { |
|
90 | - return $a[$i] - $b[$i]; |
|
91 | - } |
|
92 | - } |
|
93 | - |
|
94 | - return count($a) - count($b); |
|
95 | - } |
|
96 | - |
|
97 | - /** |
|
98 | - * Set the InputInterface instance in $this->input |
|
99 | - * |
|
100 | - * @param string $inputType |
|
101 | - * @return void |
|
102 | - */ |
|
103 | - protected function setInput($inputType) |
|
104 | - { |
|
105 | - $className = __NAMESPACE__ . '\\Input\\' . $inputType; |
|
106 | - $this->input = new $className; |
|
107 | - } |
|
108 | - |
|
109 | - /** |
|
110 | - * Set the Runner instance $in this->runner |
|
111 | - * |
|
112 | - * @return void |
|
113 | - */ |
|
114 | - protected function setRunner() |
|
115 | - { |
|
116 | - $this->runner = new Runner; |
|
117 | - $this->runner->addPass(new MergePrefix); |
|
118 | - $this->runner->addPass(new GroupSingleCharacters); |
|
119 | - $this->runner->addPass(new Recurse($this->runner)); |
|
120 | - $this->runner->addPass(new PromoteSingleStrings); |
|
121 | - $this->runner->addPass(new MergeSuffix); |
|
122 | - $this->runner->addPass(new CoalesceSingleCharacterPrefix); |
|
123 | - } |
|
124 | - |
|
125 | - /** |
|
126 | - * Set the Serializer instance in $this->serializer |
|
127 | - * |
|
128 | - * @param string $outputType |
|
129 | - * @param string $delimiter |
|
130 | - * @return void |
|
131 | - */ |
|
132 | - protected function setSerializer($outputType, $delimiter) |
|
133 | - { |
|
134 | - $className = __NAMESPACE__ . '\\Output\\' . $outputType; |
|
135 | - $output = new $className; |
|
136 | - $escaper = new Escaper($delimiter); |
|
137 | - |
|
138 | - $this->serializer = new Serializer($output, $escaper); |
|
139 | - } |
|
140 | - |
|
141 | - /** |
|
142 | - * Split all given strings by character |
|
143 | - * |
|
144 | - * @param string[] $strings List of strings |
|
145 | - * @return array[] List of arrays |
|
146 | - */ |
|
147 | - protected function splitStrings(array $strings) |
|
148 | - { |
|
149 | - return array_map([$this->input, 'split'], $strings); |
|
150 | - } |
|
21 | + /** |
|
22 | + * @var InputInterface |
|
23 | + */ |
|
24 | + protected $input; |
|
25 | + |
|
26 | + /** |
|
27 | + * @var Runner |
|
28 | + */ |
|
29 | + protected $runner; |
|
30 | + |
|
31 | + /** |
|
32 | + * @var Serializer |
|
33 | + */ |
|
34 | + protected $serializer; |
|
35 | + |
|
36 | + /** |
|
37 | + * @param array $config |
|
38 | + */ |
|
39 | + public function __construct(array $config = []) |
|
40 | + { |
|
41 | + $config += [ |
|
42 | + 'delimiter' => '/', |
|
43 | + 'input' => 'Bytes', |
|
44 | + 'output' => 'Bytes' |
|
45 | + ]; |
|
46 | + |
|
47 | + $this->setInput($config['input']); |
|
48 | + $this->setSerializer($config['output'], $config['delimiter']); |
|
49 | + $this->setRunner(); |
|
50 | + } |
|
51 | + |
|
52 | + /** |
|
53 | + * Build and return a regular expression that matches all of the given strings |
|
54 | + * |
|
55 | + * @param string[] $strings Literal strings to be matched |
|
56 | + * @return string Regular expression (without delimiters) |
|
57 | + */ |
|
58 | + public function build(array $strings) |
|
59 | + { |
|
60 | + $strings = array_unique($strings); |
|
61 | + if ($strings === ['']) |
|
62 | + { |
|
63 | + return ''; |
|
64 | + } |
|
65 | + |
|
66 | + $strings = $this->splitStrings($strings); |
|
67 | + usort($strings, __CLASS__ . '::compareStrings'); |
|
68 | + $strings = $this->runner->run($strings); |
|
69 | + |
|
70 | + return $this->serializer->serializeStrings($strings); |
|
71 | + } |
|
72 | + |
|
73 | + /** |
|
74 | + * Compare two split strings |
|
75 | + * |
|
76 | + * Will sort strings in ascending order |
|
77 | + * |
|
78 | + * @param integer[] $a |
|
79 | + * @param integer[] $b |
|
80 | + * @return integer |
|
81 | + */ |
|
82 | + protected function compareStrings(array $a, array $b) |
|
83 | + { |
|
84 | + $i = -1; |
|
85 | + $cnt = min(count($a), count($b)); |
|
86 | + while (++$i < $cnt) |
|
87 | + { |
|
88 | + if ($a[$i] !== $b[$i]) |
|
89 | + { |
|
90 | + return $a[$i] - $b[$i]; |
|
91 | + } |
|
92 | + } |
|
93 | + |
|
94 | + return count($a) - count($b); |
|
95 | + } |
|
96 | + |
|
97 | + /** |
|
98 | + * Set the InputInterface instance in $this->input |
|
99 | + * |
|
100 | + * @param string $inputType |
|
101 | + * @return void |
|
102 | + */ |
|
103 | + protected function setInput($inputType) |
|
104 | + { |
|
105 | + $className = __NAMESPACE__ . '\\Input\\' . $inputType; |
|
106 | + $this->input = new $className; |
|
107 | + } |
|
108 | + |
|
109 | + /** |
|
110 | + * Set the Runner instance $in this->runner |
|
111 | + * |
|
112 | + * @return void |
|
113 | + */ |
|
114 | + protected function setRunner() |
|
115 | + { |
|
116 | + $this->runner = new Runner; |
|
117 | + $this->runner->addPass(new MergePrefix); |
|
118 | + $this->runner->addPass(new GroupSingleCharacters); |
|
119 | + $this->runner->addPass(new Recurse($this->runner)); |
|
120 | + $this->runner->addPass(new PromoteSingleStrings); |
|
121 | + $this->runner->addPass(new MergeSuffix); |
|
122 | + $this->runner->addPass(new CoalesceSingleCharacterPrefix); |
|
123 | + } |
|
124 | + |
|
125 | + /** |
|
126 | + * Set the Serializer instance in $this->serializer |
|
127 | + * |
|
128 | + * @param string $outputType |
|
129 | + * @param string $delimiter |
|
130 | + * @return void |
|
131 | + */ |
|
132 | + protected function setSerializer($outputType, $delimiter) |
|
133 | + { |
|
134 | + $className = __NAMESPACE__ . '\\Output\\' . $outputType; |
|
135 | + $output = new $className; |
|
136 | + $escaper = new Escaper($delimiter); |
|
137 | + |
|
138 | + $this->serializer = new Serializer($output, $escaper); |
|
139 | + } |
|
140 | + |
|
141 | + /** |
|
142 | + * Split all given strings by character |
|
143 | + * |
|
144 | + * @param string[] $strings List of strings |
|
145 | + * @return array[] List of arrays |
|
146 | + */ |
|
147 | + protected function splitStrings(array $strings) |
|
148 | + { |
|
149 | + return array_map([$this->input, 'split'], $strings); |
|
150 | + } |
|
151 | 151 | } |
152 | 152 | \ No newline at end of file |
@@ -11,52 +11,52 @@ |
||
11 | 11 | |
12 | 12 | class Utf8 implements InputInterface |
13 | 13 | { |
14 | - /** |
|
15 | - * {@inheritdoc} |
|
16 | - */ |
|
17 | - public function split($string) |
|
18 | - { |
|
19 | - if (preg_match_all('(.)us', $string, $matches) === false) |
|
20 | - { |
|
21 | - throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
22 | - } |
|
14 | + /** |
|
15 | + * {@inheritdoc} |
|
16 | + */ |
|
17 | + public function split($string) |
|
18 | + { |
|
19 | + if (preg_match_all('(.)us', $string, $matches) === false) |
|
20 | + { |
|
21 | + throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
22 | + } |
|
23 | 23 | |
24 | - return $this->charsToCodepoints($matches[0]); |
|
25 | - } |
|
24 | + return $this->charsToCodepoints($matches[0]); |
|
25 | + } |
|
26 | 26 | |
27 | - /** |
|
28 | - * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
29 | - * |
|
30 | - * @param string[] $chars |
|
31 | - * @return integer[] |
|
32 | - */ |
|
33 | - protected function charsToCodepoints(array $chars) |
|
34 | - { |
|
35 | - return array_map([$this, 'cp'], $chars); |
|
36 | - } |
|
27 | + /** |
|
28 | + * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
29 | + * |
|
30 | + * @param string[] $chars |
|
31 | + * @return integer[] |
|
32 | + */ |
|
33 | + protected function charsToCodepoints(array $chars) |
|
34 | + { |
|
35 | + return array_map([$this, 'cp'], $chars); |
|
36 | + } |
|
37 | 37 | |
38 | - /** |
|
39 | - * Compute and return the Unicode codepoint for given UTF-8 char |
|
40 | - * |
|
41 | - * @param string $char UTF-8 char |
|
42 | - * @return integer |
|
43 | - */ |
|
44 | - protected function cp($char) |
|
45 | - { |
|
46 | - $cp = ord($char[0]); |
|
47 | - if ($cp >= 0xF0) |
|
48 | - { |
|
49 | - $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080; |
|
50 | - } |
|
51 | - elseif ($cp >= 0xE0) |
|
52 | - { |
|
53 | - $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080; |
|
54 | - } |
|
55 | - elseif ($cp >= 0xC0) |
|
56 | - { |
|
57 | - $cp = ($cp << 6) + ord($char[1]) - 0x3080; |
|
58 | - } |
|
38 | + /** |
|
39 | + * Compute and return the Unicode codepoint for given UTF-8 char |
|
40 | + * |
|
41 | + * @param string $char UTF-8 char |
|
42 | + * @return integer |
|
43 | + */ |
|
44 | + protected function cp($char) |
|
45 | + { |
|
46 | + $cp = ord($char[0]); |
|
47 | + if ($cp >= 0xF0) |
|
48 | + { |
|
49 | + $cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080; |
|
50 | + } |
|
51 | + elseif ($cp >= 0xE0) |
|
52 | + { |
|
53 | + $cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080; |
|
54 | + } |
|
55 | + elseif ($cp >= 0xC0) |
|
56 | + { |
|
57 | + $cp = ($cp << 6) + ord($char[1]) - 0x3080; |
|
58 | + } |
|
59 | 59 | |
60 | - return $cp; |
|
61 | - } |
|
60 | + return $cp; |
|
61 | + } |
|
62 | 62 | } |
63 | 63 | \ No newline at end of file |