| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | declare(strict_types=1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | namespace TYPO3Fluid\Fluid\Core\Parser; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * Splitter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * Byte-based calculations to perform splitting on Fluid template sources. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * Uses (64bit) bit masking to detect characters that may split a template, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * by grouping "interesting" bytes which have ordinal values within a value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * range of maximum 64 and comparing the bit mask of this and the byte being | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  * analysed. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  * Contains the methods needed to iterate and match bytes based on (mutating) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  * bit-masks, and a couple of shorthand "peek" type methods to determine if | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  * the current yield should be a certain type or another. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  * The logic is essentially the equivalent of: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  * - Using arrays of possible byte values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  * - Iterating characters and checking against the must-match bytes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  * - Using "substr" to extract relevant bits of template code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  * The difference is that the method in this class is excessively faster than | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  * any array-based counterpart and consumes orders of magnitude less memory. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  * It also means the opcode optimised version of the loop and comparisons use | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  * ideal CPU instructions at the bit-level instead, making them both smaller | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  * and even more efficient when compiled. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  * Works by: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  * - Iterating a byte value array while maintaining an internal pointer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  * - Yielding byte and position (which contains captured text since last yield) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  * - When yielding, reload the bit masks used in the next iteration | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | class Splitter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     public const MAX_NAMESPACE_LENGTH = 10; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     public const BYTE_NULL = 0; // Zero-byte for terminating documents | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     public const BYTE_INLINE = 123; // The "{" character indicating an inline expression started | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     public const BYTE_INLINE_END = 125; // The "}" character indicating an inline expression ended | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     public const BYTE_PIPE = 124; // The "|" character indicating an inline expression pass operation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     public const BYTE_MINUS = 45; // The "-" character (for legacy pass operations) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     public const BYTE_TAG = 60; // The "<" character indicating a tag has started | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     public const BYTE_TAG_END = 62; // The ">" character indicating a tag has ended | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     public const BYTE_TAG_CLOSE = 47; // The "/" character indicating a tag is a closing tag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     public const BYTE_QUOTE_DOUBLE = 34; // The " (standard double-quote) character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     public const BYTE_QUOTE_SINGLE = 39; // The ' (standard single-quote) character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     public const BYTE_WHITESPACE_SPACE = 32; // A standard space character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     public const BYTE_WHITESPACE_TAB = 9; // A standard carriage-return character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     public const BYTE_WHITESPACE_RETURN = 13; // A standard tab character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     public const BYTE_WHITESPACE_EOL = 10; // A standard (UNIX) line-break character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     public const BYTE_SEPARATOR_EQUALS = 61; // The "=" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |     public const BYTE_SEPARATOR_COLON = 58; // The ":" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     public const BYTE_SEPARATOR_COMMA = 44; // The "," character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     public const BYTE_SEPARATOR_PIPE = 124; // The "|" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     public const BYTE_PARENTHESIS_START = 40; // The "(" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     public const BYTE_PARENTHESIS_END = 41; // The ")" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     public const BYTE_ARRAY_START = 91; // The "[" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |     public const BYTE_ARRAY_END = 93; // The "]" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     public const BYTE_SLASH = 47; // The "/" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     public const BYTE_BACKSLASH = 92; // The "\" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     public const BYTE_BACKTICK = 96; // The "`" character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     public const MAP_SHIFT = 64; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     public const MASK_LINEBREAKS = 0 | (1 << self::BYTE_WHITESPACE_EOL) | (1 << self::BYTE_WHITESPACE_RETURN); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     public const MASK_WHITESPACE = 0 | self::MASK_LINEBREAKS | (1 << self::BYTE_WHITESPACE_SPACE) | (1 << self::BYTE_WHITESPACE_TAB); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     /** @var Source */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |     public $source; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     /** @var Context */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     public $context; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |     public $index = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     private $primaryMask = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     private $secondaryMask = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     public function __construct(Source $source, Contexts $contexts) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         $this->source = $source; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         $this->switch($contexts->root); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |      * Split a string by searching for recognized characters using at least one, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |      * optionally two bit masks consisting of OR'ed bit values of each detectable | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |      * character (byte). The secondary bit mask is costless as it is OR'ed into | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |      * the primary bit mask. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |      * @return \Generator|?string[] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 93 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 94 |  |  |     public function parse(): \Generator | 
            
                                                                        
                            
            
                                    
            
            
                | 95 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 96 |  |  |         $bytes = &$this->source->bytes; | 
            
                                                                        
                            
            
                                    
            
            
                | 97 |  |  |         $source = &$this->source->source; | 
            
                                                                        
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 99 |  |  |         if (empty($bytes)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 100 |  |  |             yield Splitter::BYTE_NULL => null; | 
            
                                                                        
                            
            
                                    
            
            
                | 101 |  |  |             return; | 
            
                                                                        
                            
            
                                    
            
            
                | 102 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 104 |  |  |         $captured = null; | 
            
                                                                        
                            
            
                                    
            
            
                | 105 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 106 |  |  |         foreach ($bytes as $this->index => $byte) { | 
            
                                                                        
                            
            
                                    
            
            
                | 107 |  |  |             // Decide which byte we encountered by explicitly checking if the encountered byte was in the minimum | 
            
                                                                        
                            
            
                                    
            
            
                | 108 |  |  |             // range (not-mapped match). Next check is if the matched byte is within 64-128 range in which case | 
            
                                                                        
                            
            
                                    
            
            
                | 109 |  |  |             // it is a mapped match. Anything else (>128) will be non-ASCII that is always captured. | 
            
                                                                        
                            
            
                                    
            
            
                | 110 |  |  |             if ($byte < 64 && ($this->primaryMask & (1 << $byte))) { | 
            
                                                                        
                            
            
                                    
            
            
                | 111 |  |  |                 yield $byte => $captured; | 
            
                                                                        
                            
            
                                    
            
            
                | 112 |  |  |                 $captured = null; | 
            
                                                                        
                            
            
                                    
            
            
                | 113 |  |  |             } elseif ($byte > 64 && $byte < 128 && ($this->secondaryMask & (1 << ($byte - static::MAP_SHIFT)))) { | 
            
                                                                        
                            
            
                                    
            
            
                | 114 |  |  |                 yield $byte => $captured; | 
            
                                                                        
                            
            
                                    
            
            
                | 115 |  |  |                 $captured = null; | 
            
                                                                        
                            
            
                                    
            
            
                | 116 |  |  |             } else { | 
            
                                                                        
                            
            
                                    
            
            
                | 117 |  |  |                 // Append captured bytes from source, must happen after the conditions above so we avoid appending tokens. | 
            
                                                                        
                            
            
                                    
            
            
                | 118 |  |  |                 $captured .= $source{$this->index - 1}; | 
            
                                                                        
                            
            
                                    
            
            
                | 119 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 120 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 121 |  |  |         if ($captured !== null) { | 
            
                                                                        
                            
            
                                    
            
            
                | 122 |  |  |             yield Splitter::BYTE_NULL => $captured; | 
            
                                                                        
                            
            
                                    
            
            
                | 123 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 124 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     public function switch(Context $context): Context | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         $previous = $this->context; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         $this->context = $context; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         $this->primaryMask = $context->primaryMask; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         $this->secondaryMask = $context->secondaryMask; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         return $previous ?? $context; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     public function countCharactersMatchingMask(int $primaryMask, int $offset, int $length): int | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         $bytes = &$this->source->bytes; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         $counted = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         for ($index = $offset; $index < $this->source->length; $index++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |             if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |                 $counted++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         return $counted; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |     public function findBytePositionBeforeOffset(int $primaryMask, int $offset): int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         $bytes = &$this->source->bytes; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         for ($index = min($offset, $this->source->length); $index > 0; $index--) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |             if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                 return $index; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         return 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |     public function findBytePositionAfterOffset(int $primaryMask, int $offset): int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         $bytes = &$this->source->bytes; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         for ($index = $offset; $index < $this->source->length; $index++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |             if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |                 return $index; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         return max($this->source->length, $offset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 168 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 169 |  |  |  | 
            
                        
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.