1
|
|
|
<?php |
2
|
|
|
declare(strict_types=1); |
3
|
|
|
|
4
|
|
|
/* |
5
|
|
|
* This file belongs to the package "TYPO3 Fluid". |
6
|
|
|
* See LICENSE.txt that was shipped with this package. |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace TYPO3Fluid\Fluid\Core\Parser; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Splitter |
13
|
|
|
* |
14
|
|
|
* Byte-based calculations to perform splitting on Fluid template sources. |
15
|
|
|
* Uses (64bit) bit masking to detect characters that may split a template, |
16
|
|
|
* by grouping "interesting" bytes which have ordinal values within a value |
17
|
|
|
* range of maximum 64 and comparing the bit mask of this and the byte being |
18
|
|
|
* analysed. |
19
|
|
|
* |
20
|
|
|
* Contains the methods needed to iterate and match bytes based on (mutating) |
21
|
|
|
* bit-masks, and a couple of shorthand "peek" type methods to determine if |
22
|
|
|
* the current yield should be a certain type or another. |
23
|
|
|
* |
24
|
|
|
* The logic is essentially the equivalent of: |
25
|
|
|
* |
26
|
|
|
* - Using arrays of possible byte values |
27
|
|
|
* - Iterating characters and checking against the must-match bytes |
28
|
|
|
* - Using "substr" to extract relevant bits of template code |
29
|
|
|
* |
30
|
|
|
* The difference is that the method in this class is excessively faster than |
31
|
|
|
* any array-based counterpart and consumes orders of magnitude less memory. |
32
|
|
|
* It also means the opcode optimised version of the loop and comparisons use |
33
|
|
|
* ideal CPU instructions at the bit-level instead, making them both smaller |
34
|
|
|
* and even more efficient when compiled. |
35
|
|
|
* |
36
|
|
|
* Works by: |
37
|
|
|
* |
38
|
|
|
* - Iterating a byte value array while maintaining an internal pointer |
39
|
|
|
* - Yielding byte and position (which contains captured text since last yield) |
40
|
|
|
* - When yielding, reload the bit masks used in the next iteration |
41
|
|
|
*/ |
42
|
|
|
class Splitter |
43
|
|
|
{ |
44
|
|
|
public const BYTE_NULL = 0; // Zero-byte for terminating documents |
45
|
|
|
public const MAP_SHIFT = 64; |
46
|
|
|
|
47
|
|
|
/** @var Source */ |
48
|
|
|
public $source; |
49
|
|
|
|
50
|
|
|
/** @var Context */ |
51
|
|
|
public $context; |
52
|
|
|
|
53
|
|
|
/** @var Contexts */ |
54
|
|
|
public $contexts; |
55
|
|
|
|
56
|
|
|
public $index = 0; |
57
|
|
|
private $primaryMask = 0; |
58
|
|
|
private $secondaryMask = 0; |
59
|
|
|
|
60
|
|
|
public function __construct(Source $source, Contexts $contexts) |
61
|
|
|
{ |
62
|
|
|
$this->source = $source; |
63
|
|
|
$this->contexts = $contexts; |
64
|
|
|
$this->switch($contexts->root); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* Split a string by searching for recognized characters using at least one, |
69
|
|
|
* optionally two bit masks consisting of OR'ed bit values of each detectable |
70
|
|
|
* character (byte). The secondary bit mask is costless as it is OR'ed into |
71
|
|
|
* the primary bit mask. |
72
|
|
|
* |
73
|
|
|
* @return \NoRewindIterator|string[]|null[] |
74
|
|
|
*/ |
75
|
|
|
public function parse(): \NoRewindIterator |
76
|
|
|
{ |
77
|
|
|
return new \NoRewindIterator($this->iterate()); |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* Split a string by searching for recognized characters using at least one, |
82
|
|
|
* optionally two bit masks consisting of OR'ed bit values of each detectable |
83
|
|
|
* character (byte). The secondary bit mask is costless as it is OR'ed into |
84
|
|
|
* the primary bit mask. |
85
|
|
|
* |
86
|
|
|
* @return \Generator|string[]|null[] |
87
|
|
|
*/ |
88
|
|
|
protected function iterate(): \Generator |
89
|
|
|
{ |
90
|
|
|
$bytes = &$this->source->bytes; |
91
|
|
|
$source = &$this->source->source; |
92
|
|
|
|
93
|
|
|
if (empty($bytes)) { |
94
|
|
|
yield Splitter::BYTE_NULL => null; |
95
|
|
|
return; |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
$captured = null; |
99
|
|
|
|
100
|
|
|
foreach ($bytes as $this->index => $byte) { |
101
|
|
|
// Decide which byte we encountered by explicitly checking if the encountered byte was in the minimum |
102
|
|
|
// range (not-mapped match). Next check is if the matched byte is within 64-128 range in which case |
103
|
|
|
// it is a mapped match. Anything else (>128) will be non-ASCII that is always captured. |
104
|
|
|
if ($byte < 64 && ($this->primaryMask & (1 << $byte))) { |
105
|
|
|
yield $byte => $captured; |
106
|
|
|
$captured = null; |
107
|
|
|
} elseif ($byte >= 64 && $byte < 128 && ($this->secondaryMask & (1 << ($byte - static::MAP_SHIFT)))) { |
108
|
|
|
yield $byte => $captured; |
109
|
|
|
$captured = null; |
110
|
|
|
} else { |
111
|
|
|
// Append captured bytes from source, must happen after the conditions above so we avoid appending tokens. |
112
|
|
|
$captured .= $source[$this->index - 1]; |
113
|
|
|
} |
114
|
|
|
} |
115
|
|
|
if ($captured !== null) { |
116
|
|
|
yield Splitter::BYTE_NULL => $captured; |
117
|
|
|
} |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
public function switch(Context $context): Context |
121
|
|
|
{ |
122
|
|
|
$previous = $this->context; |
123
|
|
|
$this->context = $context; |
124
|
|
|
$this->primaryMask = $context->primaryMask; |
125
|
|
|
$this->secondaryMask = $context->secondaryMask; |
126
|
|
|
return $previous ?? $context; |
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
|