Completed
Pull Request — master (#457)
by Claus
03:30 queued 37s
created

Splitter::parse()   B

Complexity

Conditions 9
Paths 9

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
nc 9
nop 0
dl 0
loc 31
rs 8.0555
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace TYPO3Fluid\Fluid\Core\Parser;
5
6
/**
7
 * Splitter
8
 *
9
 * Byte-based calculations to perform splitting on Fluid template sources.
10
 * Uses (64bit) bit masking to detect characters that may split a template,
11
 * by grouping "interesting" bytes which have ordinal values within a value
12
 * range of maximum 64 and comparing the bit mask of this and the byte being
13
 * analysed.
14
 *
15
 * Contains the methods needed to iterate and match bytes based on (mutating)
16
 * bit-masks, and a couple of shorthand "peek" type methods to determine if
17
 * the current yield should be a certain type or another.
18
 *
19
 * The logic is essentially the equivalent of:
20
 *
21
 * - Using arrays of possible byte values
22
 * - Iterating characters and checking against the must-match bytes
23
 * - Using "substr" to extract relevant bits of template code
24
 *
25
 * The difference is that the method in this class is excessively faster than
26
 * any array-based counterpart and consumes orders of magnitude less memory.
27
 * It also means the opcode optimised version of the loop and comparisons use
28
 * ideal CPU instructions at the bit-level instead, making them both smaller
29
 * and even more efficient when compiled.
30
 *
31
 * Works by:
32
 *
33
 * - Iterating a byte value array while maintaining an internal pointer
34
 * - Yielding byte and position (which contains captured text since last yield)
35
 * - When yielding, reload the bit masks used in the next iteration
36
 */
37
class Splitter
38
{
39
    public const MAX_NAMESPACE_LENGTH = 10;
40
41
    public const BYTE_NULL = 0; // Zero-byte for terminating documents
42
    public const BYTE_INLINE = 123; // The "{" character indicating an inline expression started
43
    public const BYTE_INLINE_END = 125; // The "}" character indicating an inline expression ended
44
    public const BYTE_PIPE = 124; // The "|" character indicating an inline expression pass operation
45
    public const BYTE_MINUS = 45; // The "-" character (for legacy pass operations)
46
    public const BYTE_TAG = 60; // The "<" character indicating a tag has started
47
    public const BYTE_TAG_END = 62; // The ">" character indicating a tag has ended
48
    public const BYTE_TAG_CLOSE = 47; // The "/" character indicating a tag is a closing tag
49
    public const BYTE_QUOTE_DOUBLE = 34; // The " (standard double-quote) character
50
    public const BYTE_QUOTE_SINGLE = 39; // The ' (standard single-quote) character
51
    public const BYTE_WHITESPACE_SPACE = 32; // A standard space character
52
    public const BYTE_WHITESPACE_TAB = 9; // A standard carriage-return character
53
    public const BYTE_WHITESPACE_RETURN = 13; // A standard tab character
54
    public const BYTE_WHITESPACE_EOL = 10; // A standard (UNIX) line-break character
55
    public const BYTE_SEPARATOR_EQUALS = 61; // The "=" character
56
    public const BYTE_SEPARATOR_COLON = 58; // The ":" character
57
    public const BYTE_SEPARATOR_COMMA = 44; // The "," character
58
    public const BYTE_SEPARATOR_PIPE = 124; // The "|" character
59
    public const BYTE_PARENTHESIS_START = 40; // The "(" character
60
    public const BYTE_PARENTHESIS_END = 41; // The ")" character
61
    public const BYTE_ARRAY_START = 91; // The "[" character
62
    public const BYTE_ARRAY_END = 93; // The "]" character
63
    public const BYTE_SLASH = 47; // The "/" character
64
    public const BYTE_BACKSLASH = 92; // The "\" character
65
    public const BYTE_BACKTICK = 96; // The "`" character
66
    public const MAP_SHIFT = 64;
67
    public const MASK_LINEBREAKS = 0 | (1 << self::BYTE_WHITESPACE_EOL) | (1 << self::BYTE_WHITESPACE_RETURN);
68
    public const MASK_WHITESPACE = 0 | self::MASK_LINEBREAKS | (1 << self::BYTE_WHITESPACE_SPACE) | (1 << self::BYTE_WHITESPACE_TAB);
69
70
    /** @var Source */
71
    public $source;
72
73
    /** @var Context */
74
    public $context;
75
76
    public $index = 0;
77
    private $primaryMask = 0;
78
    private $secondaryMask = 0;
79
80
    public function __construct(Source $source, Contexts $contexts)
81
    {
82
        $this->source = $source;
83
        $this->switch($contexts->root);
84
    }
85
86
    /**
87
     * Split a string by searching for recognized characters using at least one,
88
     * optionally two bit masks consisting of OR'ed bit values of each detectable
89
     * character (byte). The secondary bit mask is costless as it is OR'ed into
90
     * the primary bit mask.
91
     *
92
     * @return \Generator|?string[]
0 ignored issues
show
Documentation introduced by
The doc-type \Generator|?string[] could not be parsed: Unknown type name "?string" at position 11. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
93
     */
94
    public function parse(): \Generator
95
    {
96
        $bytes = &$this->source->bytes;
97
        $source = &$this->source->source;
98
99
        if (empty($bytes)) {
100
            yield Splitter::BYTE_NULL => null;
101
            return;
102
        }
103
104
        $captured = null;
105
106
        foreach ($bytes as $this->index => $byte) {
107
            // Decide which byte we encountered by explicitly checking if the encountered byte was in the minimum
108
            // range (not-mapped match). Next check is if the matched byte is within 64-128 range in which case
109
            // it is a mapped match. Anything else (>128) will be non-ASCII that is always captured.
110
            if ($byte < 64 && ($this->primaryMask & (1 << $byte))) {
111
                yield $byte => $captured;
112
                $captured = null;
113
            } elseif ($byte > 64 && $byte < 128 && ($this->secondaryMask & (1 << ($byte - static::MAP_SHIFT)))) {
114
                yield $byte => $captured;
115
                $captured = null;
116
            } else {
117
                // Append captured bytes from source, must happen after the conditions above so we avoid appending tokens.
118
                $captured .= $source{$this->index - 1};
119
            }
120
        }
121
        if ($captured !== null) {
122
            yield Splitter::BYTE_NULL => $captured;
123
        }
124
    }
125
126
    public function switch(Context $context): Context
127
    {
128
        $previous = $this->context;
129
        $this->context = $context;
130
        $this->primaryMask = $context->primaryMask;
131
        $this->secondaryMask = $context->secondaryMask;
132
        return $previous ?? $context;
133
    }
134
135
    public function countCharactersMatchingMask(int $primaryMask, int $offset, int $length): int
0 ignored issues
show
Unused Code introduced by
The parameter $length is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
136
    {
137
        $bytes = &$this->source->bytes;
138
        $counted = 0;
139
        for ($index = $offset; $index < $this->source->length; $index++) {
140
            if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) {
141
                $counted++;
142
            }
143
        }
144
        return $counted;
145
    }
146
147
    public function findBytePositionBeforeOffset(int $primaryMask, int $offset): int
148
    {
149
        $bytes = &$this->source->bytes;
150
        for ($index = min($offset, $this->source->length); $index > 0; $index--) {
151
            if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) {
152
                return $index;
153
            }
154
        }
155
        return 0;
156
    }
157
158
    public function findBytePositionAfterOffset(int $primaryMask, int $offset): int
159
    {
160
        $bytes = &$this->source->bytes;
161
        for ($index = $offset; $index < $this->source->length; $index++) {
162
            if (($primaryMask & (1 << $bytes[$index])) && $bytes[$index] < 64) {
163
                return $index;
164
            }
165
        }
166
        return max($this->source->length, $offset);
167
    }
168
}
169