Completed
Push — master ( 5875b6...d65a71 )
by brian
01:43
created

Tokenizer::tokenize()   C

Complexity

Conditions 12
Paths 12

Size

Total Lines 66

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 38
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 66
ccs 38
cts 38
cp 1
rs 6.3151
c 0
b 0
f 0
cc 12
nc 12
nop 1
crap 12

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types=1);
2
3
/**
4
 * @copyright   (c) 2017-present brian ridley
5
 * @author      brian ridley <[email protected]>
6
 * @license     http://opensource.org/licenses/MIT MIT
7
 */
8
9
namespace ptlis\SerializedDataEditor\Parser;
10
11
/**
12
 * Tokenizes serialized PHP data.
13
 *
14
 * See http://www.phpinternalsbook.com/classes_objects/serialization.html
15
 */
16
final class Tokenizer
17
{
18
    /**
19
     * Processes serialized data into an array of tokens.
20
     *
21
     * @param string $serializedData
22
     * @return Token[]
23
     */
24 22
    public function tokenize(string $serializedData): array
25
    {
26 22
        $tokenList = [];
27 22
        for ($i = 0; $i < strlen($serializedData); $i++) {
28 22
            $character = substr($serializedData, $i, 1);
29
30
            switch ($character) {
31
                // Null
32 22
                case Token::PREFIX_NULL:
33 1
                    $tokenList[] = new Token(Token::NULL);
34 1
                    $i++; // Skip terminator character ';'
35 1
                    break;
36
37
                // Boolean
38 21
                case Token::PREFIX_BOOL:
39 4
                    $tokenList[] = new Token(Token::BOOL, substr($serializedData, $i + 2, 1));
40 4
                    $i += 3;
41 4
                    break;
42
43
                // Integer
44 19
                case Token::PREFIX_INTEGER:
45 12
                    $tokenList[] = $this->getNumberToken(Token::INTEGER, $serializedData, $i);
46 12
                    break;
47
48
                // Float
49 17
                case Token::PREFIX_FLOAT:
50 2
                    $tokenList[] = $this->getNumberToken(Token::FLOAT, $serializedData, $i);
51 2
                    break;
52
53
                // String
54 16
                case Token::PREFIX_STRING:
55 13
                    $tokenList[] = $this->getStringToken($serializedData, $i);
56 13
                    break;
57
58
                // Reference
59 14
                case Token::PREFIX_REFERENCE:
60 1
                    $tokenList[] = $this->getNumberToken(Token::REFERENCE, $serializedData, $i);
61 1
                    break;
62
63
                // Array
64 14
                case Token::PREFIX_ARRAY_START:
65 10
                    $tokenList[] = $this->getArrayToken($serializedData, $i);
66 10
                    break;
67
68
                // Object, default serialization
69 14
                case Token::PREFIX_OBJECT_DEFAULT_NAME:
70 7
                    $tokenList = array_merge($tokenList, $this->getObjectDefaultToken($serializedData, $i));
71 7
                    break;
72
73
                // Object, custom serialization
74 14
                case Token::PREFIX_OBJECT_CUSTOM_NAME:
75 1
                    $tokenList = array_merge($tokenList, $this->getObjectCustomToken($serializedData, $i));
76 1
                    break;
77
78
                // Array or object end
79 13
                case Token::PREFIX_COMPOUND_END:
80 12
                    $tokenList[] = new Token(Token::COMPOUND_END);
81 12
                    break;
82
83
                default:
84 1
                    throw new \RuntimeException('Invalid serialized data - unexpected character "' . $character . '" encountered');
85
            }
86
        }
87
88 21
        return $tokenList;
89
    }
90
91
    /**
92
     * Create token for an integer or float.
93
     */
94 13
    private function getNumberToken(string $type, string $serializedData, int &$currentIndex): Token
95
    {
96
        // Skip first characters 'i:'
97 13
        $currentIndex += 2;
98
99 13
        return new Token(
100 13
            $type,
101 13
            $this->readUntil($serializedData, $currentIndex, ';')
102
        );
103
    }
104
105
    /**
106
     * Create token for a string.
107
     */
108 13
    private function getStringToken(string $serializedData, int &$currentIndex): Token
109
    {
110
        // Skip first characters 's:'
111 13
        $currentIndex += 2;
112
113
        // Get string length
114 13
        $stringLength = $this->readUntil($serializedData, $currentIndex, ':');
115
116
        // Skip separator and open quote ':"'
117 13
        $currentIndex += 2;
118
119
        // Read string
120 13
        $string = substr($serializedData, $currentIndex, intval($stringLength));
121 13
        $currentIndex += intval($stringLength);
122
123
        // Skip close terminator '";'
124 13
        $currentIndex++;
125
126 13
        return new Token(Token::STRING, $string);
127
    }
128
129
    /**
130
     * Create token for the start of an array.
131
     */
132 10
    private function getArrayToken(string $serializedData, int &$currentIndex): Token
133
    {
134
        // Skip first characters 'a:'
135 10
        $currentIndex += 2;
136
137
        // Get array length
138 10
        $arrayLength = $this->readUntil($serializedData, $currentIndex, ':');
139
140
        // Skip array open
141 10
        $currentIndex += 1;
142
143 10
        return new Token(Token::ARRAY_START, $arrayLength);
144
    }
145
146
    /**
147
     * Create tokens for the start of an object serialized with PHP's default serialization format.
148
     *
149
     * @return Token[]
150
     */
151 7
    private function getObjectDefaultToken(string $serializedData, int &$currentIndex): array
152
    {
153
        // Skip first characters 'O:'
154 7
        $currentIndex += 2;
155
156
        // Get class name
157 7
        $classNameLength = $this->readUntil($serializedData, $currentIndex, ':');
158
159
        // Skip separator and open quote ':"'
160 7
        $currentIndex += 2;
161
162
        // Read class name
163 7
        $className = substr($serializedData, $currentIndex, intval($classNameLength));
164 7
        $currentIndex += intval($classNameLength);
165
166
        // Skip closing quote and seperator '":'
167 7
        $currentIndex += 2;
168
169
        // Get object property count
170 7
        $objectPropertyCount = $this->readUntil($serializedData, $currentIndex, ':');
171
172
        // Skip terminator
173 7
        $currentIndex++;
174
175
        return [
176 7
            new Token(Token::OBJECT_DEFAULT_NAME, $className),
177 7
            new Token(Token::OBJECT_MEMBER_COUNT, $objectPropertyCount)
178
        ];
179
    }
180
181
    /**
182
     * Create tokens for the start of an object serialized with a custom serialization format.
183
     *
184
     * @return Token[]
185
     */
186 1
    private function getObjectCustomToken(string $serializedData, int &$currentIndex): array
187
    {
188
        // Skip first characters 'O:'
189 1
        $currentIndex += 2;
190
191
        // Get class name
192 1
        $classNameLength = $this->readUntil($serializedData, $currentIndex, ':');
193
194
        // Skip separator and open quote ':"'
195 1
        $currentIndex += 2;
196
197
        // Read class name
198 1
        $className = substr($serializedData, $currentIndex, intval($classNameLength));
199 1
        $currentIndex += intval($classNameLength);
200
201
        // Skip closing quote and seperator '":'
202 1
        $currentIndex += 2;
203
204
        // Get length of serialized data
205 1
        $serializedDataLength = $this->readUntil($serializedData, $currentIndex, ':');
206
207
        // Skip seperator and opening delimiter ':{'
208 1
        $currentIndex += 2;
209
210
        // Read serialized data
211 1
        $data = substr($serializedData, $currentIndex, intval($serializedDataLength));
212 1
        $currentIndex += intval($serializedDataLength);
213
214
        // Skip closing delimiter '}'
215 1
        $currentIndex++;
216
217
        return [
218 1
            new Token(Token::OBJECT_CUSTOM_NAME, $className),
219 1
            new Token(Token::OBJECT_CUSTOM_DATA, $data),
220 1
            new Token(Token::COMPOUND_END)
221
        ];
222
    }
223
224
    /**
225
     * Returns the segment of $serializedData from $currentIndex until the first instance of $char is found.
226
     */
227 18
    private function readUntil(
228
        string $serializedData,
229
        int &$currentIndex,
230
        string $char
231
    ): string {
232 18
        $string = '';
233 18
        while (substr($serializedData, $currentIndex, strlen($char)) !== $char) {
234 18
            $string .= substr($serializedData, $currentIndex, 1);
235 18
            $currentIndex++;
236
        }
237 18
        return $string;
238
    }
239
}