@@ -11,62 +11,62 @@ |
||
| 11 | 11 | |
| 12 | 12 | class Utf8 implements InputInterface |
| 13 | 13 | { |
| 14 | - /** |
|
| 15 | - * {@inheritdoc} |
|
| 16 | - */ |
|
| 17 | - public function split($string) |
|
| 18 | - { |
|
| 19 | - if (preg_match_all('(.)us', $string, $matches) === false) |
|
| 20 | - { |
|
| 21 | - throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
| 22 | - } |
|
| 14 | + /** |
|
| 15 | + * {@inheritdoc} |
|
| 16 | + */ |
|
| 17 | + public function split($string) |
|
| 18 | + { |
|
| 19 | + if (preg_match_all('(.)us', $string, $matches) === false) |
|
| 20 | + { |
|
| 21 | + throw new InvalidArgumentException('Invalid UTF-8 string'); |
|
| 22 | + } |
|
| 23 | 23 | |
| 24 | - return $this->charsToCodepoints($matches[0]); |
|
| 25 | - } |
|
| 24 | + return $this->charsToCodepoints($matches[0]); |
|
| 25 | + } |
|
| 26 | 26 | |
| 27 | - /** |
|
| 28 | - * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
| 29 | - * |
|
| 30 | - * @param string[] $chars |
|
| 31 | - * @return integer[] |
|
| 32 | - */ |
|
| 33 | - protected function charsToCodepoints(array $chars) |
|
| 34 | - { |
|
| 35 | - return array_map([$this, 'cp'], $chars); |
|
| 36 | - } |
|
| 27 | + /** |
|
| 28 | + * Convert a list of UTF-8 characters to a list of Unicode codepoint |
|
| 29 | + * |
|
| 30 | + * @param string[] $chars |
|
| 31 | + * @return integer[] |
|
| 32 | + */ |
|
| 33 | + protected function charsToCodepoints(array $chars) |
|
| 34 | + { |
|
| 35 | + return array_map([$this, 'cp'], $chars); |
|
| 36 | + } |
|
| 37 | 37 | |
| 38 | - /** |
|
| 39 | - * Compute and return the Unicode codepoint for given UTF-8 char |
|
| 40 | - * |
|
| 41 | - * @param string $char UTF-8 char |
|
| 42 | - * @return integer |
|
| 43 | - */ |
|
| 44 | - protected function cp($char) |
|
| 45 | - { |
|
| 46 | - $size = strlen($char); |
|
| 47 | - if ($size === 1) |
|
| 48 | - { |
|
| 49 | - $cp = ord($char); |
|
| 50 | - } |
|
| 51 | - elseif ($size === 2) |
|
| 52 | - { |
|
| 53 | - $cp = ((ord($char[0]) & 0b00011111) << 6) |
|
| 54 | - | (ord($char[1]) & 0b00111111); |
|
| 55 | - } |
|
| 56 | - elseif ($size === 3) |
|
| 57 | - { |
|
| 58 | - $cp = ((ord($char[0]) & 0b00001111) << 12) |
|
| 59 | - | ((ord($char[1]) & 0b00111111) << 6) |
|
| 60 | - | (ord($char[2]) & 0b00111111); |
|
| 61 | - } |
|
| 62 | - else |
|
| 63 | - { |
|
| 64 | - $cp = ((ord($char[0]) & 0b00000111) << 18) |
|
| 65 | - | ((ord($char[1]) & 0b00111111) << 12) |
|
| 66 | - | ((ord($char[2]) & 0b00111111) << 6) |
|
| 67 | - | (ord($char[3]) & 0b00111111); |
|
| 68 | - } |
|
| 38 | + /** |
|
| 39 | + * Compute and return the Unicode codepoint for given UTF-8 char |
|
| 40 | + * |
|
| 41 | + * @param string $char UTF-8 char |
|
| 42 | + * @return integer |
|
| 43 | + */ |
|
| 44 | + protected function cp($char) |
|
| 45 | + { |
|
| 46 | + $size = strlen($char); |
|
| 47 | + if ($size === 1) |
|
| 48 | + { |
|
| 49 | + $cp = ord($char); |
|
| 50 | + } |
|
| 51 | + elseif ($size === 2) |
|
| 52 | + { |
|
| 53 | + $cp = ((ord($char[0]) & 0b00011111) << 6) |
|
| 54 | + | (ord($char[1]) & 0b00111111); |
|
| 55 | + } |
|
| 56 | + elseif ($size === 3) |
|
| 57 | + { |
|
| 58 | + $cp = ((ord($char[0]) & 0b00001111) << 12) |
|
| 59 | + | ((ord($char[1]) & 0b00111111) << 6) |
|
| 60 | + | (ord($char[2]) & 0b00111111); |
|
| 61 | + } |
|
| 62 | + else |
|
| 63 | + { |
|
| 64 | + $cp = ((ord($char[0]) & 0b00000111) << 18) |
|
| 65 | + | ((ord($char[1]) & 0b00111111) << 12) |
|
| 66 | + | ((ord($char[2]) & 0b00111111) << 6) |
|
| 67 | + | (ord($char[3]) & 0b00111111); |
|
| 68 | + } |
|
| 69 | 69 | |
| 70 | - return $cp; |
|
| 71 | - } |
|
| 70 | + return $cp; |
|
| 71 | + } |
|
| 72 | 72 | } |
| 73 | 73 | \ No newline at end of file |
@@ -18,94 +18,94 @@ |
||
| 18 | 18 | |
| 19 | 19 | class Builder |
| 20 | 20 | { |
| 21 | - /** |
|
| 22 | - * @var InputInterface |
|
| 23 | - */ |
|
| 24 | - protected $input; |
|
| 25 | - |
|
| 26 | - /** |
|
| 27 | - * @var Runner |
|
| 28 | - */ |
|
| 29 | - protected $runner; |
|
| 30 | - |
|
| 31 | - /** |
|
| 32 | - * @var Serializer |
|
| 33 | - */ |
|
| 34 | - protected $serializer; |
|
| 35 | - |
|
| 36 | - /** |
|
| 37 | - * @param array $config |
|
| 38 | - */ |
|
| 39 | - public function __construct(array $config = []) |
|
| 40 | - { |
|
| 41 | - $config = $this->getConfig($config); |
|
| 42 | - |
|
| 43 | - $this->input = $config['input']; |
|
| 44 | - $this->runner = $config['runner']; |
|
| 45 | - $this->serializer = new Serializer($config['output'], $config['escaper']); |
|
| 46 | - } |
|
| 47 | - |
|
| 48 | - /** |
|
| 49 | - * Build and return a regular expression that matches all of the given strings |
|
| 50 | - * |
|
| 51 | - * @param string[] $strings Literal strings to be matched |
|
| 52 | - * @return string Regular expression (without delimiters) |
|
| 53 | - */ |
|
| 54 | - public function build(array $strings) |
|
| 55 | - { |
|
| 56 | - $strings = array_unique($strings); |
|
| 57 | - if ($strings === ['']) |
|
| 58 | - { |
|
| 59 | - return ''; |
|
| 60 | - } |
|
| 61 | - sort($strings); |
|
| 62 | - |
|
| 63 | - $strings = $this->splitStrings($strings); |
|
| 64 | - $strings = $this->runner->run($strings); |
|
| 65 | - |
|
| 66 | - return $this->serializer->serializeStrings($strings); |
|
| 67 | - } |
|
| 68 | - |
|
| 69 | - /** |
|
| 70 | - * Build the full config array based on given input |
|
| 71 | - * |
|
| 72 | - * @param array $config Sparse config |
|
| 73 | - * @return array Full config |
|
| 74 | - */ |
|
| 75 | - protected function getConfig(array $config) |
|
| 76 | - { |
|
| 77 | - $config += [ |
|
| 78 | - 'delimiter' => '/', |
|
| 79 | - 'input' => 'Bytes', |
|
| 80 | - 'output' => 'Bytes' |
|
| 81 | - ]; |
|
| 82 | - $config['escaper'] = new Escaper($config['delimiter']); |
|
| 83 | - |
|
| 84 | - $className = __NAMESPACE__ . '\\Input\\' . $config['input']; |
|
| 85 | - $config['input'] = new $className; |
|
| 86 | - |
|
| 87 | - $className = __NAMESPACE__ . '\\Output\\' . $config['output']; |
|
| 88 | - $config['output'] = new $className; |
|
| 89 | - |
|
| 90 | - $config['runner'] = new Runner; |
|
| 91 | - $config['runner']->addPass(new MergePrefix); |
|
| 92 | - $config['runner']->addPass(new GroupSingleCharacters); |
|
| 93 | - $config['runner']->addPass(new Recurse($config['runner'])); |
|
| 94 | - $config['runner']->addPass(new PromoteSingleStrings); |
|
| 95 | - $config['runner']->addPass(new MergeSuffix); |
|
| 96 | - $config['runner']->addPass(new CoalesceSingleCharacterPrefix); |
|
| 97 | - |
|
| 98 | - return $config; |
|
| 99 | - } |
|
| 100 | - |
|
| 101 | - /** |
|
| 102 | - * Split all given strings by character |
|
| 103 | - * |
|
| 104 | - * @param string[] $strings List of strings |
|
| 105 | - * @return array[] List of arrays |
|
| 106 | - */ |
|
| 107 | - protected function splitStrings(array $strings) |
|
| 108 | - { |
|
| 109 | - return array_map([$this->input, 'split'], $strings); |
|
| 110 | - } |
|
| 21 | + /** |
|
| 22 | + * @var InputInterface |
|
| 23 | + */ |
|
| 24 | + protected $input; |
|
| 25 | + |
|
| 26 | + /** |
|
| 27 | + * @var Runner |
|
| 28 | + */ |
|
| 29 | + protected $runner; |
|
| 30 | + |
|
| 31 | + /** |
|
| 32 | + * @var Serializer |
|
| 33 | + */ |
|
| 34 | + protected $serializer; |
|
| 35 | + |
|
| 36 | + /** |
|
| 37 | + * @param array $config |
|
| 38 | + */ |
|
| 39 | + public function __construct(array $config = []) |
|
| 40 | + { |
|
| 41 | + $config = $this->getConfig($config); |
|
| 42 | + |
|
| 43 | + $this->input = $config['input']; |
|
| 44 | + $this->runner = $config['runner']; |
|
| 45 | + $this->serializer = new Serializer($config['output'], $config['escaper']); |
|
| 46 | + } |
|
| 47 | + |
|
| 48 | + /** |
|
| 49 | + * Build and return a regular expression that matches all of the given strings |
|
| 50 | + * |
|
| 51 | + * @param string[] $strings Literal strings to be matched |
|
| 52 | + * @return string Regular expression (without delimiters) |
|
| 53 | + */ |
|
| 54 | + public function build(array $strings) |
|
| 55 | + { |
|
| 56 | + $strings = array_unique($strings); |
|
| 57 | + if ($strings === ['']) |
|
| 58 | + { |
|
| 59 | + return ''; |
|
| 60 | + } |
|
| 61 | + sort($strings); |
|
| 62 | + |
|
| 63 | + $strings = $this->splitStrings($strings); |
|
| 64 | + $strings = $this->runner->run($strings); |
|
| 65 | + |
|
| 66 | + return $this->serializer->serializeStrings($strings); |
|
| 67 | + } |
|
| 68 | + |
|
| 69 | + /** |
|
| 70 | + * Build the full config array based on given input |
|
| 71 | + * |
|
| 72 | + * @param array $config Sparse config |
|
| 73 | + * @return array Full config |
|
| 74 | + */ |
|
| 75 | + protected function getConfig(array $config) |
|
| 76 | + { |
|
| 77 | + $config += [ |
|
| 78 | + 'delimiter' => '/', |
|
| 79 | + 'input' => 'Bytes', |
|
| 80 | + 'output' => 'Bytes' |
|
| 81 | + ]; |
|
| 82 | + $config['escaper'] = new Escaper($config['delimiter']); |
|
| 83 | + |
|
| 84 | + $className = __NAMESPACE__ . '\\Input\\' . $config['input']; |
|
| 85 | + $config['input'] = new $className; |
|
| 86 | + |
|
| 87 | + $className = __NAMESPACE__ . '\\Output\\' . $config['output']; |
|
| 88 | + $config['output'] = new $className; |
|
| 89 | + |
|
| 90 | + $config['runner'] = new Runner; |
|
| 91 | + $config['runner']->addPass(new MergePrefix); |
|
| 92 | + $config['runner']->addPass(new GroupSingleCharacters); |
|
| 93 | + $config['runner']->addPass(new Recurse($config['runner'])); |
|
| 94 | + $config['runner']->addPass(new PromoteSingleStrings); |
|
| 95 | + $config['runner']->addPass(new MergeSuffix); |
|
| 96 | + $config['runner']->addPass(new CoalesceSingleCharacterPrefix); |
|
| 97 | + |
|
| 98 | + return $config; |
|
| 99 | + } |
|
| 100 | + |
|
| 101 | + /** |
|
| 102 | + * Split all given strings by character |
|
| 103 | + * |
|
| 104 | + * @param string[] $strings List of strings |
|
| 105 | + * @return array[] List of arrays |
|
| 106 | + */ |
|
| 107 | + protected function splitStrings(array $strings) |
|
| 108 | + { |
|
| 109 | + return array_map([$this->input, 'split'], $strings); |
|
| 110 | + } |
|
| 111 | 111 | } |
| 112 | 112 | \ No newline at end of file |
@@ -11,35 +11,35 @@ |
||
| 11 | 11 | |
| 12 | 12 | class Runner |
| 13 | 13 | { |
| 14 | - /** |
|
| 15 | - * @var PassInterface[] |
|
| 16 | - */ |
|
| 17 | - protected $passes = []; |
|
| 14 | + /** |
|
| 15 | + * @var PassInterface[] |
|
| 16 | + */ |
|
| 17 | + protected $passes = []; |
|
| 18 | 18 | |
| 19 | - /** |
|
| 20 | - * Add a pass to the list |
|
| 21 | - * |
|
| 22 | - * @param PassInterface $pass |
|
| 23 | - * @return void |
|
| 24 | - */ |
|
| 25 | - public function addPass(PassInterface $pass) |
|
| 26 | - { |
|
| 27 | - $this->passes[] = $pass; |
|
| 28 | - } |
|
| 19 | + /** |
|
| 20 | + * Add a pass to the list |
|
| 21 | + * |
|
| 22 | + * @param PassInterface $pass |
|
| 23 | + * @return void |
|
| 24 | + */ |
|
| 25 | + public function addPass(PassInterface $pass) |
|
| 26 | + { |
|
| 27 | + $this->passes[] = $pass; |
|
| 28 | + } |
|
| 29 | 29 | |
| 30 | - /** |
|
| 31 | - * Run all passes on the list of strings |
|
| 32 | - * |
|
| 33 | - * @param array[] $strings |
|
| 34 | - * @return array[] |
|
| 35 | - */ |
|
| 36 | - public function run(array $strings) |
|
| 37 | - { |
|
| 38 | - foreach ($this->passes as $pass) |
|
| 39 | - { |
|
| 40 | - $strings = $pass->run($strings); |
|
| 41 | - } |
|
| 30 | + /** |
|
| 31 | + * Run all passes on the list of strings |
|
| 32 | + * |
|
| 33 | + * @param array[] $strings |
|
| 34 | + * @return array[] |
|
| 35 | + */ |
|
| 36 | + public function run(array $strings) |
|
| 37 | + { |
|
| 38 | + foreach ($this->passes as $pass) |
|
| 39 | + { |
|
| 40 | + $strings = $pass->run($strings); |
|
| 41 | + } |
|
| 42 | 42 | |
| 43 | - return $strings; |
|
| 44 | - } |
|
| 43 | + return $strings; |
|
| 44 | + } |
|
| 45 | 45 | } |
| 46 | 46 | \ No newline at end of file |
@@ -9,11 +9,11 @@ |
||
| 9 | 9 | |
| 10 | 10 | interface OutputInterface |
| 11 | 11 | { |
| 12 | - /** |
|
| 13 | - * Serialize a value into a character |
|
| 14 | - * |
|
| 15 | - * @param integer $value |
|
| 16 | - * @return string |
|
| 17 | - */ |
|
| 18 | - public function output($value); |
|
| 12 | + /** |
|
| 13 | + * Serialize a value into a character |
|
| 14 | + * |
|
| 15 | + * @param integer $value |
|
| 16 | + * @return string |
|
| 17 | + */ |
|
| 18 | + public function output($value); |
|
| 19 | 19 | } |
| 20 | 20 | \ No newline at end of file |
@@ -11,189 +11,189 @@ |
||
| 11 | 11 | |
| 12 | 12 | class Serializer |
| 13 | 13 | { |
| 14 | - /** |
|
| 15 | - * @var Escaper |
|
| 16 | - */ |
|
| 17 | - protected $escaper; |
|
| 18 | - |
|
| 19 | - /** |
|
| 20 | - * @var OutputInterface |
|
| 21 | - */ |
|
| 22 | - protected $output; |
|
| 23 | - |
|
| 24 | - /** |
|
| 25 | - * @param OutputInterface $output |
|
| 26 | - * @param Escaper $escaper |
|
| 27 | - */ |
|
| 28 | - public function __construct(OutputInterface $output, Escaper $escaper) |
|
| 29 | - { |
|
| 30 | - $this->escaper = $escaper; |
|
| 31 | - $this->output = $output; |
|
| 32 | - } |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * Serialize given strings into a regular expression |
|
| 36 | - * |
|
| 37 | - * @param array[] $strings |
|
| 38 | - * @return string |
|
| 39 | - */ |
|
| 40 | - public function serializeStrings(array $strings) |
|
| 41 | - { |
|
| 42 | - $info = $this->analyzeStrings($strings); |
|
| 43 | - $alternations = $this->buildAlternations($info); |
|
| 44 | - $expr = implode('|', $alternations); |
|
| 45 | - |
|
| 46 | - if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
| 47 | - { |
|
| 48 | - $expr = '(?:' . $expr . ')'; |
|
| 49 | - } |
|
| 50 | - |
|
| 51 | - return $expr . $info['quantifier']; |
|
| 52 | - } |
|
| 53 | - |
|
| 54 | - /** |
|
| 55 | - * Analyze given strings to determine how to serialize them |
|
| 56 | - * |
|
| 57 | - * The returned array may contains any of the following elements: |
|
| 58 | - * |
|
| 59 | - * - (string) quantifier Either '' or '?' |
|
| 60 | - * - (array) chars List of values from single-char strings |
|
| 61 | - * - (array) strings List of multi-char strings |
|
| 62 | - * |
|
| 63 | - * @param array[] $strings |
|
| 64 | - * @return array |
|
| 65 | - */ |
|
| 66 | - protected function analyzeStrings(array $strings) |
|
| 67 | - { |
|
| 68 | - $info = ['quantifier' => '']; |
|
| 69 | - $chars = []; |
|
| 70 | - foreach ($strings as $k => $string) |
|
| 71 | - { |
|
| 72 | - if (empty($string)) |
|
| 73 | - { |
|
| 74 | - $info['quantifier'] = '?'; |
|
| 75 | - unset($strings[$k]); |
|
| 76 | - } |
|
| 77 | - elseif (!isset($string[1])) |
|
| 78 | - { |
|
| 79 | - $chars[$k] = $string[0]; |
|
| 80 | - } |
|
| 81 | - } |
|
| 82 | - |
|
| 83 | - if (count($chars) > 1) |
|
| 84 | - { |
|
| 85 | - $info['chars'] = array_values($chars); |
|
| 86 | - $strings = array_diff_key($strings, $chars); |
|
| 87 | - } |
|
| 88 | - |
|
| 89 | - $info['strings'] = array_values($strings); |
|
| 90 | - |
|
| 91 | - return $info; |
|
| 92 | - } |
|
| 93 | - |
|
| 94 | - /** |
|
| 95 | - * Build the list of alternations based on given info |
|
| 96 | - * |
|
| 97 | - * @param array $info |
|
| 98 | - * @return string[] |
|
| 99 | - */ |
|
| 100 | - protected function buildAlternations(array $info) |
|
| 101 | - { |
|
| 102 | - $alternations = []; |
|
| 103 | - if (!empty($info['chars'])) |
|
| 104 | - { |
|
| 105 | - $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
| 106 | - } |
|
| 107 | - foreach ($info['strings'] as $string) |
|
| 108 | - { |
|
| 109 | - $alternations[] = $this->serializeString($string); |
|
| 110 | - } |
|
| 111 | - |
|
| 112 | - return $alternations; |
|
| 113 | - } |
|
| 114 | - |
|
| 115 | - /** |
|
| 116 | - * Get the list of ranges that cover all given values |
|
| 117 | - * |
|
| 118 | - * @param integer[] $values Ordered list of values |
|
| 119 | - * @return array[] List of ranges in the form [start, end] |
|
| 120 | - */ |
|
| 121 | - protected function getRanges(array $values) |
|
| 122 | - { |
|
| 123 | - $i = 0; |
|
| 124 | - $cnt = count($values); |
|
| 125 | - $start = $values[0]; |
|
| 126 | - $end = $start; |
|
| 127 | - $ranges = []; |
|
| 128 | - while (++$i < $cnt) |
|
| 129 | - { |
|
| 130 | - if ($values[$i] === $end + 1) |
|
| 131 | - { |
|
| 132 | - ++$end; |
|
| 133 | - } |
|
| 134 | - else |
|
| 135 | - { |
|
| 136 | - $ranges[] = [$start, $end]; |
|
| 137 | - $start = $end = $values[$i]; |
|
| 138 | - } |
|
| 139 | - } |
|
| 140 | - $ranges[] = [$start, $end]; |
|
| 141 | - |
|
| 142 | - return $ranges; |
|
| 143 | - } |
|
| 144 | - |
|
| 145 | - /** |
|
| 146 | - * Test whether a string is optional and has more than one character |
|
| 147 | - * |
|
| 148 | - * @param array $info |
|
| 149 | - * @return bool |
|
| 150 | - */ |
|
| 151 | - protected function isOneOptionalString(array $info) |
|
| 152 | - { |
|
| 153 | - // Test whether the first string has a quantifier and more than one element |
|
| 154 | - return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
| 155 | - } |
|
| 156 | - |
|
| 157 | - /** |
|
| 158 | - * Serialize a given list of values into a character class |
|
| 159 | - * |
|
| 160 | - * @param integer[] $values |
|
| 161 | - * @return string |
|
| 162 | - */ |
|
| 163 | - protected function serializeCharacterClass(array $values) |
|
| 164 | - { |
|
| 165 | - $expr = '['; |
|
| 166 | - foreach ($this->getRanges($values) as list($start, $end)) |
|
| 167 | - { |
|
| 168 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
| 169 | - if ($end > $start) |
|
| 170 | - { |
|
| 171 | - if ($end > $start + 1) |
|
| 172 | - { |
|
| 173 | - $expr .= '-'; |
|
| 174 | - } |
|
| 175 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
| 176 | - } |
|
| 177 | - } |
|
| 178 | - $expr .= ']'; |
|
| 179 | - |
|
| 180 | - return $expr; |
|
| 181 | - } |
|
| 182 | - |
|
| 183 | - /** |
|
| 184 | - * Serialize a given string into a regular expression |
|
| 185 | - * |
|
| 186 | - * @param array $string |
|
| 187 | - * @return string |
|
| 188 | - */ |
|
| 189 | - protected function serializeString(array $string) |
|
| 190 | - { |
|
| 191 | - $expr = ''; |
|
| 192 | - foreach ($string as $element) |
|
| 193 | - { |
|
| 194 | - $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
| 195 | - } |
|
| 196 | - |
|
| 197 | - return $expr; |
|
| 198 | - } |
|
| 14 | + /** |
|
| 15 | + * @var Escaper |
|
| 16 | + */ |
|
| 17 | + protected $escaper; |
|
| 18 | + |
|
| 19 | + /** |
|
| 20 | + * @var OutputInterface |
|
| 21 | + */ |
|
| 22 | + protected $output; |
|
| 23 | + |
|
| 24 | + /** |
|
| 25 | + * @param OutputInterface $output |
|
| 26 | + * @param Escaper $escaper |
|
| 27 | + */ |
|
| 28 | + public function __construct(OutputInterface $output, Escaper $escaper) |
|
| 29 | + { |
|
| 30 | + $this->escaper = $escaper; |
|
| 31 | + $this->output = $output; |
|
| 32 | + } |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * Serialize given strings into a regular expression |
|
| 36 | + * |
|
| 37 | + * @param array[] $strings |
|
| 38 | + * @return string |
|
| 39 | + */ |
|
| 40 | + public function serializeStrings(array $strings) |
|
| 41 | + { |
|
| 42 | + $info = $this->analyzeStrings($strings); |
|
| 43 | + $alternations = $this->buildAlternations($info); |
|
| 44 | + $expr = implode('|', $alternations); |
|
| 45 | + |
|
| 46 | + if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
| 47 | + { |
|
| 48 | + $expr = '(?:' . $expr . ')'; |
|
| 49 | + } |
|
| 50 | + |
|
| 51 | + return $expr . $info['quantifier']; |
|
| 52 | + } |
|
| 53 | + |
|
| 54 | + /** |
|
| 55 | + * Analyze given strings to determine how to serialize them |
|
| 56 | + * |
|
| 57 | + * The returned array may contains any of the following elements: |
|
| 58 | + * |
|
| 59 | + * - (string) quantifier Either '' or '?' |
|
| 60 | + * - (array) chars List of values from single-char strings |
|
| 61 | + * - (array) strings List of multi-char strings |
|
| 62 | + * |
|
| 63 | + * @param array[] $strings |
|
| 64 | + * @return array |
|
| 65 | + */ |
|
| 66 | + protected function analyzeStrings(array $strings) |
|
| 67 | + { |
|
| 68 | + $info = ['quantifier' => '']; |
|
| 69 | + $chars = []; |
|
| 70 | + foreach ($strings as $k => $string) |
|
| 71 | + { |
|
| 72 | + if (empty($string)) |
|
| 73 | + { |
|
| 74 | + $info['quantifier'] = '?'; |
|
| 75 | + unset($strings[$k]); |
|
| 76 | + } |
|
| 77 | + elseif (!isset($string[1])) |
|
| 78 | + { |
|
| 79 | + $chars[$k] = $string[0]; |
|
| 80 | + } |
|
| 81 | + } |
|
| 82 | + |
|
| 83 | + if (count($chars) > 1) |
|
| 84 | + { |
|
| 85 | + $info['chars'] = array_values($chars); |
|
| 86 | + $strings = array_diff_key($strings, $chars); |
|
| 87 | + } |
|
| 88 | + |
|
| 89 | + $info['strings'] = array_values($strings); |
|
| 90 | + |
|
| 91 | + return $info; |
|
| 92 | + } |
|
| 93 | + |
|
| 94 | + /** |
|
| 95 | + * Build the list of alternations based on given info |
|
| 96 | + * |
|
| 97 | + * @param array $info |
|
| 98 | + * @return string[] |
|
| 99 | + */ |
|
| 100 | + protected function buildAlternations(array $info) |
|
| 101 | + { |
|
| 102 | + $alternations = []; |
|
| 103 | + if (!empty($info['chars'])) |
|
| 104 | + { |
|
| 105 | + $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
| 106 | + } |
|
| 107 | + foreach ($info['strings'] as $string) |
|
| 108 | + { |
|
| 109 | + $alternations[] = $this->serializeString($string); |
|
| 110 | + } |
|
| 111 | + |
|
| 112 | + return $alternations; |
|
| 113 | + } |
|
| 114 | + |
|
| 115 | + /** |
|
| 116 | + * Get the list of ranges that cover all given values |
|
| 117 | + * |
|
| 118 | + * @param integer[] $values Ordered list of values |
|
| 119 | + * @return array[] List of ranges in the form [start, end] |
|
| 120 | + */ |
|
| 121 | + protected function getRanges(array $values) |
|
| 122 | + { |
|
| 123 | + $i = 0; |
|
| 124 | + $cnt = count($values); |
|
| 125 | + $start = $values[0]; |
|
| 126 | + $end = $start; |
|
| 127 | + $ranges = []; |
|
| 128 | + while (++$i < $cnt) |
|
| 129 | + { |
|
| 130 | + if ($values[$i] === $end + 1) |
|
| 131 | + { |
|
| 132 | + ++$end; |
|
| 133 | + } |
|
| 134 | + else |
|
| 135 | + { |
|
| 136 | + $ranges[] = [$start, $end]; |
|
| 137 | + $start = $end = $values[$i]; |
|
| 138 | + } |
|
| 139 | + } |
|
| 140 | + $ranges[] = [$start, $end]; |
|
| 141 | + |
|
| 142 | + return $ranges; |
|
| 143 | + } |
|
| 144 | + |
|
| 145 | + /** |
|
| 146 | + * Test whether a string is optional and has more than one character |
|
| 147 | + * |
|
| 148 | + * @param array $info |
|
| 149 | + * @return bool |
|
| 150 | + */ |
|
| 151 | + protected function isOneOptionalString(array $info) |
|
| 152 | + { |
|
| 153 | + // Test whether the first string has a quantifier and more than one element |
|
| 154 | + return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
| 155 | + } |
|
| 156 | + |
|
| 157 | + /** |
|
| 158 | + * Serialize a given list of values into a character class |
|
| 159 | + * |
|
| 160 | + * @param integer[] $values |
|
| 161 | + * @return string |
|
| 162 | + */ |
|
| 163 | + protected function serializeCharacterClass(array $values) |
|
| 164 | + { |
|
| 165 | + $expr = '['; |
|
| 166 | + foreach ($this->getRanges($values) as list($start, $end)) |
|
| 167 | + { |
|
| 168 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
| 169 | + if ($end > $start) |
|
| 170 | + { |
|
| 171 | + if ($end > $start + 1) |
|
| 172 | + { |
|
| 173 | + $expr .= '-'; |
|
| 174 | + } |
|
| 175 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
| 176 | + } |
|
| 177 | + } |
|
| 178 | + $expr .= ']'; |
|
| 179 | + |
|
| 180 | + return $expr; |
|
| 181 | + } |
|
| 182 | + |
|
| 183 | + /** |
|
| 184 | + * Serialize a given string into a regular expression |
|
| 185 | + * |
|
| 186 | + * @param array $string |
|
| 187 | + * @return string |
|
| 188 | + */ |
|
| 189 | + protected function serializeString(array $string) |
|
| 190 | + { |
|
| 191 | + $expr = ''; |
|
| 192 | + foreach ($string as $element) |
|
| 193 | + { |
|
| 194 | + $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
| 195 | + } |
|
| 196 | + |
|
| 197 | + return $expr; |
|
| 198 | + } |
|
| 199 | 199 | } |
| 200 | 200 | \ No newline at end of file |
@@ -9,53 +9,53 @@ |
||
| 9 | 9 | |
| 10 | 10 | abstract class PrintableAscii extends BaseImplementation |
| 11 | 11 | { |
| 12 | - /** |
|
| 13 | - * {@inheritdoc} |
|
| 14 | - */ |
|
| 15 | - protected function outputValidValue($value) |
|
| 16 | - { |
|
| 17 | - if ($value < 32) |
|
| 18 | - { |
|
| 19 | - return $this->escapeControlCode($value); |
|
| 20 | - } |
|
| 21 | - |
|
| 22 | - if ($value < 127) |
|
| 23 | - { |
|
| 24 | - return chr($value); |
|
| 25 | - } |
|
| 26 | - |
|
| 27 | - return ($value > 255) ? $this->escapeUnicode($value) : $this->escapeAscii($value); |
|
| 28 | - } |
|
| 29 | - |
|
| 30 | - /** |
|
| 31 | - * Escape given ASCII codepoint |
|
| 32 | - * |
|
| 33 | - * @param integer $cp |
|
| 34 | - * @return string |
|
| 35 | - */ |
|
| 36 | - protected function escapeAscii($cp) |
|
| 37 | - { |
|
| 38 | - return '\\x' . sprintf('%02X', $cp); |
|
| 39 | - } |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * Escape given control code |
|
| 43 | - * |
|
| 44 | - * @param integer $cp |
|
| 45 | - * @return string |
|
| 46 | - */ |
|
| 47 | - protected function escapeControlCode($cp) |
|
| 48 | - { |
|
| 49 | - $table = [9 => '\\t', 10 => '\\n', 13 => '\\r']; |
|
| 50 | - |
|
| 51 | - return (isset($table[$cp])) ? $table[$cp] : $this->escapeAscii($cp); |
|
| 52 | - } |
|
| 53 | - |
|
| 54 | - /** |
|
| 55 | - * Output the representation of a unicode character |
|
| 56 | - * |
|
| 57 | - * @param integer $cp Unicode codepoint |
|
| 58 | - * @return string |
|
| 59 | - */ |
|
| 60 | - abstract protected function escapeUnicode($cp); |
|
| 12 | + /** |
|
| 13 | + * {@inheritdoc} |
|
| 14 | + */ |
|
| 15 | + protected function outputValidValue($value) |
|
| 16 | + { |
|
| 17 | + if ($value < 32) |
|
| 18 | + { |
|
| 19 | + return $this->escapeControlCode($value); |
|
| 20 | + } |
|
| 21 | + |
|
| 22 | + if ($value < 127) |
|
| 23 | + { |
|
| 24 | + return chr($value); |
|
| 25 | + } |
|
| 26 | + |
|
| 27 | + return ($value > 255) ? $this->escapeUnicode($value) : $this->escapeAscii($value); |
|
| 28 | + } |
|
| 29 | + |
|
| 30 | + /** |
|
| 31 | + * Escape given ASCII codepoint |
|
| 32 | + * |
|
| 33 | + * @param integer $cp |
|
| 34 | + * @return string |
|
| 35 | + */ |
|
| 36 | + protected function escapeAscii($cp) |
|
| 37 | + { |
|
| 38 | + return '\\x' . sprintf('%02X', $cp); |
|
| 39 | + } |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * Escape given control code |
|
| 43 | + * |
|
| 44 | + * @param integer $cp |
|
| 45 | + * @return string |
|
| 46 | + */ |
|
| 47 | + protected function escapeControlCode($cp) |
|
| 48 | + { |
|
| 49 | + $table = [9 => '\\t', 10 => '\\n', 13 => '\\r']; |
|
| 50 | + |
|
| 51 | + return (isset($table[$cp])) ? $table[$cp] : $this->escapeAscii($cp); |
|
| 52 | + } |
|
| 53 | + |
|
| 54 | + /** |
|
| 55 | + * Output the representation of a unicode character |
|
| 56 | + * |
|
| 57 | + * @param integer $cp Unicode codepoint |
|
| 58 | + * @return string |
|
| 59 | + */ |
|
| 60 | + abstract protected function escapeUnicode($cp); |
|
| 61 | 61 | } |
| 62 | 62 | \ No newline at end of file |
@@ -9,14 +9,14 @@ |
||
| 9 | 9 | |
| 10 | 10 | class JavaScript extends PrintableAscii |
| 11 | 11 | { |
| 12 | - /** {@inheritdoc} */ |
|
| 13 | - protected $maxValue = 0xFFFF; |
|
| 12 | + /** {@inheritdoc} */ |
|
| 13 | + protected $maxValue = 0xFFFF; |
|
| 14 | 14 | |
| 15 | - /** |
|
| 16 | - * {@inheritdoc} |
|
| 17 | - */ |
|
| 18 | - protected function escapeUnicode($cp) |
|
| 19 | - { |
|
| 20 | - return sprintf('\\u%04X', $cp); |
|
| 21 | - } |
|
| 15 | + /** |
|
| 16 | + * {@inheritdoc} |
|
| 17 | + */ |
|
| 18 | + protected function escapeUnicode($cp) |
|
| 19 | + { |
|
| 20 | + return sprintf('\\u%04X', $cp); |
|
| 21 | + } |
|
| 22 | 22 | } |
| 23 | 23 | \ No newline at end of file |
@@ -9,31 +9,31 @@ |
||
| 9 | 9 | |
| 10 | 10 | class Utf8 extends BaseImplementation |
| 11 | 11 | { |
| 12 | - /** {@inheritdoc} */ |
|
| 13 | - protected $maxValue = 0x10FFFF; |
|
| 12 | + /** {@inheritdoc} */ |
|
| 13 | + protected $maxValue = 0x10FFFF; |
|
| 14 | 14 | |
| 15 | - /** |
|
| 16 | - * {@inheritdoc} |
|
| 17 | - */ |
|
| 18 | - protected function outputValidValue($value) |
|
| 19 | - { |
|
| 20 | - if ($value < 0x80) |
|
| 21 | - { |
|
| 22 | - return chr($value); |
|
| 23 | - } |
|
| 24 | - if ($value < 0x800) |
|
| 25 | - { |
|
| 26 | - return chr(0xC0 | ($value >> 6)) . chr(0x80 | ($value & 0x3F)); |
|
| 27 | - } |
|
| 28 | - if ($value < 0x10000) |
|
| 29 | - { |
|
| 30 | - return chr(0xE0 | ($value >> 12)) |
|
| 31 | - . chr(0x80 | (($value >> 6) & 0x3F)) |
|
| 32 | - . chr(0x80 | ($value & 0x3F)); |
|
| 33 | - } |
|
| 34 | - return chr(0xF0 | ($value >> 18)) |
|
| 35 | - . chr(0x80 | (($value >> 12) & 0x3F)) |
|
| 36 | - . chr(0x80 | (($value >> 6) & 0x3F)) |
|
| 37 | - . chr(0x80 | ($value & 0x3F)); |
|
| 38 | - } |
|
| 15 | + /** |
|
| 16 | + * {@inheritdoc} |
|
| 17 | + */ |
|
| 18 | + protected function outputValidValue($value) |
|
| 19 | + { |
|
| 20 | + if ($value < 0x80) |
|
| 21 | + { |
|
| 22 | + return chr($value); |
|
| 23 | + } |
|
| 24 | + if ($value < 0x800) |
|
| 25 | + { |
|
| 26 | + return chr(0xC0 | ($value >> 6)) . chr(0x80 | ($value & 0x3F)); |
|
| 27 | + } |
|
| 28 | + if ($value < 0x10000) |
|
| 29 | + { |
|
| 30 | + return chr(0xE0 | ($value >> 12)) |
|
| 31 | + . chr(0x80 | (($value >> 6) & 0x3F)) |
|
| 32 | + . chr(0x80 | ($value & 0x3F)); |
|
| 33 | + } |
|
| 34 | + return chr(0xF0 | ($value >> 18)) |
|
| 35 | + . chr(0x80 | (($value >> 12) & 0x3F)) |
|
| 36 | + . chr(0x80 | (($value >> 6) & 0x3F)) |
|
| 37 | + . chr(0x80 | ($value & 0x3F)); |
|
| 38 | + } |
|
| 39 | 39 | } |
| 40 | 40 | \ No newline at end of file |
@@ -9,14 +9,14 @@ |
||
| 9 | 9 | |
| 10 | 10 | class Bytes extends BaseImplementation |
| 11 | 11 | { |
| 12 | - /** {@inheritdoc} */ |
|
| 13 | - protected $maxValue = 255; |
|
| 12 | + /** {@inheritdoc} */ |
|
| 13 | + protected $maxValue = 255; |
|
| 14 | 14 | |
| 15 | - /** |
|
| 16 | - * {@inheritdoc} |
|
| 17 | - */ |
|
| 18 | - protected function outputValidValue($value) |
|
| 19 | - { |
|
| 20 | - return chr($value); |
|
| 21 | - } |
|
| 15 | + /** |
|
| 16 | + * {@inheritdoc} |
|
| 17 | + */ |
|
| 18 | + protected function outputValidValue($value) |
|
| 19 | + { |
|
| 20 | + return chr($value); |
|
| 21 | + } |
|
| 22 | 22 | } |
| 23 | 23 | \ No newline at end of file |