Completed
Pull Request — master (#470)
by Claus
01:35
created

Splitter::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
/*
5
 * This file belongs to the package "TYPO3 Fluid".
6
 * See LICENSE.txt that was shipped with this package.
7
 */
8
9
namespace TYPO3Fluid\Fluid\Core\Parser;
10
11
/**
12
 * Splitter
13
 *
14
 * Byte-based calculations to perform splitting on Fluid template sources.
15
 * Uses (64bit) bit masking to detect characters that may split a template,
16
 * by grouping "interesting" bytes which have ordinal values within a value
17
 * range of maximum 64 and comparing the bit mask of this and the byte being
18
 * analysed.
19
 *
20
 * Contains the methods needed to iterate and match bytes based on (mutating)
21
 * bit-masks, and a couple of shorthand "peek" type methods to determine if
22
 * the current yield should be a certain type or another.
23
 *
24
 * The logic is essentially the equivalent of:
25
 *
26
 * - Using arrays of possible byte values
27
 * - Iterating characters and checking against the must-match bytes
28
 * - Using "substr" to extract relevant bits of template code
29
 *
30
 * The difference is that the method in this class is excessively faster than
31
 * any array-based counterpart and consumes orders of magnitude less memory.
32
 * It also means the opcode optimised version of the loop and comparisons use
33
 * ideal CPU instructions at the bit-level instead, making them both smaller
34
 * and even more efficient when compiled.
35
 *
36
 * Works by:
37
 *
38
 * - Iterating a byte value array while maintaining an internal pointer
39
 * - Yielding byte and position (which contains captured text since last yield)
40
 * - When yielding, reload the bit masks used in the next iteration
41
 */
42
class Splitter
43
{
44
    public const BYTE_NULL = 0; // Zero-byte for terminating documents
45
    public const MAP_SHIFT = 64;
46
47
    /** @var Source */
48
    public $source;
49
50
    /** @var Context */
51
    public $context;
52
53
    /** @var Contexts */
54
    public $contexts;
55
56
    public $index = 0;
57
    private $primaryMask = 0;
58
    private $secondaryMask = 0;
59
60
    public function __construct(Source $source, Contexts $contexts)
61
    {
62
        $this->source = $source;
63
        $this->contexts = $contexts;
64
        $this->switch($contexts->root);
65
    }
66
67
    /**
68
     * Split a string by searching for recognized characters using at least one,
69
     * optionally two bit masks consisting of OR'ed bit values of each detectable
70
     * character (byte). The secondary bit mask is costless as it is OR'ed into
71
     * the primary bit mask.
72
     *
73
     * @return \NoRewindIterator|string[]|null[]
74
     */
75
    public function parse(): \NoRewindIterator
76
    {
77
        return new \NoRewindIterator($this->iterate());
78
    }
79
80
    /**
81
     * Split a string by searching for recognized characters using at least one,
82
     * optionally two bit masks consisting of OR'ed bit values of each detectable
83
     * character (byte). The secondary bit mask is costless as it is OR'ed into
84
     * the primary bit mask.
85
     *
86
     * @return \Generator|string[]|null[]
87
     */
88
    protected function iterate(): \Generator
89
    {
90
        $bytes = &$this->source->bytes;
91
        $source = &$this->source->source;
92
93
        if (empty($bytes)) {
94
            yield Splitter::BYTE_NULL => null;
95
            return;
96
        }
97
98
        $captured = null;
99
100
        foreach ($bytes as $this->index => $byte) {
101
            // Decide which byte we encountered by explicitly checking if the encountered byte was in the minimum
102
            // range (not-mapped match). Next check is if the matched byte is within 64-128 range in which case
103
            // it is a mapped match. Anything else (>128) will be non-ASCII that is always captured.
104
            if ($byte < 64 && ($this->primaryMask & (1 << $byte))) {
105
                yield $byte => $captured;
106
                $captured = null;
107
            } elseif ($byte >= 64 && $byte < 128 && ($this->secondaryMask & (1 << ($byte - static::MAP_SHIFT)))) {
108
                yield $byte => $captured;
109
                $captured = null;
110
            } else {
111
                // Append captured bytes from source, must happen after the conditions above so we avoid appending tokens.
112
                $captured .= $source[$this->index - 1];
113
            }
114
        }
115
        if ($captured !== null) {
116
            yield Splitter::BYTE_NULL => $captured;
117
        }
118
    }
119
120
    public function switch(Context $context): Context
121
    {
122
        $previous = $this->context;
123
        $this->context = $context;
124
        $this->primaryMask = $context->primaryMask;
125
        $this->secondaryMask = $context->secondaryMask;
126
        return $previous ?? $context;
127
    }
128
}
129