Completed
Push — master ( 5987cd...c258b5 )
by Michal
04:11
created

Formatter::getGroupLength()   B

Complexity

Conditions 6
Paths 6

Size

Total Lines 38
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 6.0702

Importance

Changes 2
Bugs 0 Features 1
Metric Value
c 2
b 0
f 1
dl 0
loc 38
ccs 14
cts 16
cp 0.875
rs 8.439
cc 6
eloc 13
nc 6
nop 1
crap 6.0702
1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 *
6
 * @package    SqlParser
7
 * @subpackage Utils
8
 */
9
namespace SqlParser\Utils;
10
11
use SqlParser\Lexer;
12
use SqlParser\Parser;
13
use SqlParser\Token;
14
use SqlParser\TokensList;
15
16
/**
17
 * Utilities that are used for formatting queries.
18
 *
19
 * @category   Misc
20
 * @package    SqlParser
21
 * @subpackage Utils
22
 * @author     Dan Ungureanu <[email protected]>
23
 * @license    http://opensource.org/licenses/GPL-2.0 GNU Public License
24
 */
25
class Formatter
26
{
27
28
    /**
29
     * The formatting options.
30
     *
31
     * @var array
32
     */
33
    public $options;
34
35
    /**
36
     * Clauses that must be inlined.
37
     *
38
     * These clauses usually are short and it's nicer to have them inline.
39
     *
40
     * @var array
41
     */
42
    public static $INLINE_CLAUSES = array(
43
        'CREATE'                        => true,
44
        'LIMIT'                         => true,
45
        'PARTITION BY'                  => true,
46
        'PARTITION'                     => true,
47
        'PROCEDURE'                     => true,
48
        'SUBPARTITION BY'               => true,
49
        'VALUES'                        => true,
50
    );
51
52
    /**
53
     * Constructor.
54
     *
55
     * @param array $options The formatting options.
56
     */
57 7
    public function __construct(array $options = array())
58
    {
59
        // The specified formatting options are merged with the default values.
60 7
        $this->options = array_merge(
61
            array(
62
63
                /**
64
                 * The format of the result.
65
                 *
66
                 * @var string The type ('text', 'cli' or 'html')
67
                 */
68 7
                'type' => php_sapi_name() == 'cli' ? 'cli' : 'text',
69
70
                /**
71
                 * The line ending used.
72
                 * By default, for text this is "\n" and for HTML this is "<br/>".
73
                 *
74
                 * @var string
75
                 */
76 7
                'line_ending' => $this->options['type'] == 'html' ? '<br/>' : "\n",
77
78
                /**
79
                 * The string used for indentation.
80
                 *
81
                 * @var string
82
                 */
83 7
                'indentation' => '  ',
84
85
                /**
86
                 * Whether comments should be removed or not.
87
                 *
88
                 * @var bool
89
                 */
90 7
                'remove_comments' => false,
91
92
                /**
93
                 * Whether each clause should be on a new line.
94
                 *
95
                 * @var bool
96
                 */
97 7
                'clause_newline' => true,
98
99
                /**
100
                 * Whether each part should be on a new line.
101
                 * Parts are delimited by brackets and commas.
102
                 *
103
                 * @var bool
104
                 */
105 7
                'parts_newline' => true,
106
107
                /**
108
                 * Whether each part of each clause should be indented.
109
                 *
110
                 * @var bool
111
                 */
112 7
                'indent_parts' => true,
113
114
                /**
115
                 * The styles used for HTML formatting.
116
                 * array($type, $flags, $span, $callback)
117
                 *
118
                 * @var array[]
119
                 */
120
                'formats' => array(
121
                    array(
122 7
                        'type'      => Token::TYPE_KEYWORD,
123 7
                        'flags'     => Token::FLAG_KEYWORD_RESERVED,
124 7
                        'html'      => 'class="sql-reserved"',
125 7
                        'cli'       => "\e[35m",
126 7
                        'function'  => 'strtoupper',
127 7
                    ),
128
                    array(
129 7
                        'type'      => Token::TYPE_KEYWORD,
130 7
                        'flags'     => 0,
131 7
                        'html'      => 'class="sql-keyword"',
132 7
                        'cli'       => "\e[95m",
133 7
                        'function'  => 'strtoupper',
134 7
                    ),
135
                    array(
136 7
                        'type'      => Token::TYPE_COMMENT,
137 7
                        'flags'     => 0,
138 7
                        'html'      => 'class="sql-comment"',
139 7
                        'cli'       => "\e[37m",
140 7
                        'function'  => '',
141 7
                    ),
142
                    array(
143 7
                        'type'      => Token::TYPE_BOOL,
144 7
                        'flags'     => 0,
145 7
                        'html'      => 'class="sql-atom"',
146 7
                        'cli'       => "\e[36m",
147 7
                        'function'  => 'strtoupper',
148 7
                    ),
149
                    array(
150 7
                        'type'      => Token::TYPE_NUMBER,
151 7
                        'flags'     => 0,
152 7
                        'html'      => 'class="sql-number"',
153 7
                        'cli'       => "\e[92m",
154 7
                        'function'  => 'strtolower',
155 7
                    ),
156
                    array(
157 7
                        'type'      => Token::TYPE_STRING,
158 7
                        'flags'     => 0,
159 7
                        'html'      => 'class="sql-string"',
160 7
                        'cli'       => "\e[91m",
161 7
                        'function'  => '',
162 7
                    ),
163
                    array(
164 7
                        'type'      => Token::TYPE_SYMBOL,
165 7
                        'flags'     => 0,
166 7
                        'html'      => 'class="sql-variable"',
167 7
                        'cli'       => "\e[36m",
168 7
                        'function'  => '',
169 7
                    ),
170
                )
171 7
            ),
172
            $options
173 7
        );
174
175
        // `parts_newline` requires `clause_newline`
176 7
        $this->options['parts_newline'] &= $this->options['clause_newline'];
177 7
    }
178
179
    /**
180
     * Formats the given list of tokens.
181
     *
182
     * @param TokensList $list The list of tokens.
183
     *
184
     * @return string
185
     */
186 7
    public function formatList($list)
187
    {
188
189
        /**
190
         * The query to be returned.
191
         *
192
         * @var string $ret
193
         */
194 7
        $ret = '';
195
196
        /**
197
         * The indentation level.
198
         *
199
         * @var int $indent
200
         */
201 7
        $indent = 0;
202
203
        /**
204
         * Whether the line ended.
205
         *
206
         * @var bool $lineEnded
207
         */
208 7
        $lineEnded = false;
209
210
        /**
211
         * The name of the last clause.
212
         *
213
         * @var string $lastClause
214
         */
215 7
        $lastClause = '';
216
217
        /**
218
         * A stack that keeps track of the indentation level every time a new
219
         * block is found.
220
         *
221
         * @var array $blocksIndentation
222
         */
223 7
        $blocksIndentation = array();
224
225
        /**
226
         * A stack that keeps track of the line endings every time a new block
227
         * is found.
228
         *
229
         * @var array $blocksLineEndings
230
         */
231 7
        $blocksLineEndings = array();
232
233
        /**
234
         * Whether clause's options were formatted.
235
         *
236
         * @var bool $formattedOptions
237
         */
238 7
        $formattedOptions = false;
239
240
        /**
241
         * Previously parsed token.
242
         *
243
         * @var Token $prev
244
         */
245 7
        $prev = null;
246
247
        /**
248
         * Comments are being formatted separately to maintain the whitespaces
249
         * before and after them.
250
         *
251
         * @var string $comment
252
         */
253 7
        $comment = '';
254
255
        // In order to be able to format the queries correctly, the next token
256
        // must be taken into consideration. The loop below uses two pointers,
257
        // `$prev` and `$curr` which store two consecutive tokens.
258
        // Actually, at every iteration the previous token is being used.
259 7
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
260
            /**
261
             * Token parsed at this moment.
262
             *
263
             * @var Token $curr
264
             */
265 7
            $curr = $list->tokens[$list->idx];
266
267 7
            if ($curr->type === Token::TYPE_WHITESPACE) {
268
                // Whitespaces are skipped because the formatter adds its own.
269 7
                continue;
270 7
            } elseif ($curr->type === Token::TYPE_COMMENT) {
271
                // Whether the comments should be parsed.
272 1
                if (!empty($this->options['remove_comments'])) {
273
                    continue;
274
                }
275
276 1
                if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) {
277
                    // The whitespaces before and after are preserved for
278
                    // formatting reasons.
279 1
                    $comment .= $list->tokens[$list->idx - 1]->token;
280 1
                }
281 1
                $comment .= $this->toString($curr);
282 1
                if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE)
283 1
                    && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT)
284 1
                ) {
285
                    // Adding the next whitespace only there is no comment that
286
                    // follows it immediately which may cause adding a
287
                    // whitespace twice.
288
                    $comment .= $list->tokens[$list->idx + 1]->token;
289
                }
290
291
                // Everything was handled here, no need to continue.
292 1
                continue;
293
            }
294
295
            // Checking if pointers were initialized.
296 7
            if ($prev !== null) {
297
                // Checking if a new clause started.
298 7
                if (static::isClause($prev) !== false) {
299 7
                    $lastClause = $prev->value;
300 7
                    $formattedOptions = false;
301 7
                }
302
303
                // The options of a clause should stay on the same line and everything that follows.
304 7
                if (($this->options['parts_newline'])
305 7
                    && (!$formattedOptions)
306 7
                    && (empty(self::$INLINE_CLAUSES[$lastClause]))
307 7
                    && (($curr->type !== Token::TYPE_KEYWORD)
308 3
                    || (($curr->type === Token::TYPE_KEYWORD)
309 3
                    && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION)))
310 7
                ) {
311 7
                    $formattedOptions = true;
312 7
                    $lineEnded = true;
313 7
                    ++$indent;
314 7
                }
315
316
                // Checking if this clause ended.
317 7
                if ($tmp = static::isClause($curr)) {
318 3
                    if (($tmp == 2) || ($this->options['clause_newline'])) {
319 3
                        $lineEnded = true;
320 3
                        if ($this->options['parts_newline']) {
321 3
                            --$indent;
322 3
                        }
323 3
                    }
324 3
                }
325
326
                // Indenting BEGIN ... END blocks.
327 7
                if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) {
328 1
                    $lineEnded = true;
329 1
                    array_push($blocksIndentation, $indent);
330 1
                    ++$indent;
331 7 View Code Duplication
                } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) {
332 1
                    $lineEnded = true;
333 1
                    $indent = array_pop($blocksIndentation);
334 1
                }
335
336
                // Formatting fragments delimited by comma.
337 7
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) {
338
                    // Fragments delimited by a comma are broken into multiple
339
                    // pieces only if the clause is not inlined or this fragment
340
                    // is between brackets that are on new line.
341 1
                    if (((empty(self::$INLINE_CLAUSES[$lastClause]))
342 1
                        && ($this->options['parts_newline']))
343 1
                        || (end($blocksLineEndings) === true)
344 1
                    ) {
345
                        $lineEnded = true;
346
                    }
347 1
                }
348
349
                // Handling brackets.
350
                // Brackets are indented only if the length of the fragment between
351
                // them is longer than 30 characters.
352 7
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) {
353 3
                    array_push($blocksIndentation, $indent);
354 3
                    if (static::getGroupLength($list) > 30) {
355
                        ++$indent;
356
                        $lineEnded = true;
357
                    }
358 3
                    array_push($blocksLineEndings, $lineEnded);
359 7 View Code Duplication
                } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) {
360 2
                    $indent = array_pop($blocksIndentation);
361 2
                    $lineEnded |= array_pop($blocksLineEndings);
362 2
                }
363
364
                // Delimiter must be placed on the same line with the last
365
                // clause.
366 7
                if ($curr->type === Token::TYPE_DELIMITER) {
367 7
                    $lineEnded = false;
368 7
                }
369
370
                // Adding the token.
371 7
                $ret .= $this->toString($prev);
372
373
                // Finishing the line.
374 7
                if ($lineEnded) {
375 7
                    if ($indent < 0) {
376
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
377 2
                        $indent = 0;
378 2
                    }
379
380 7
                    if ($curr->type !== Token::TYPE_COMMENT) {
381 7
                        $ret .= $this->options['line_ending']
382 7
                            . str_repeat($this->options['indentation'], $indent);
383 7
                    }
384 7
                    $lineEnded = false;
385 7
                } else {
386
                    // If the line ended there is no point in adding whitespaces.
387
                    // Also, some tokens do not have spaces before or after them.
388 7
                    if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '(')))
389
                        // No space after . (
390 7
                        || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',')
391 4
                        || ($curr->value === '(') || ($curr->value === ')')))
392
                        // No space before . , ( )
393 7
                        || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2))
394
                        // A space after delimiters that are longer than 2 characters.
395 7
                        || ($prev->value === 'DELIMITER')
396 7
                    ) {
397 3
                        $ret .= ' ';
398 3
                    }
399
                }
400 7
            }
401
402 7
            if (!empty($comment)) {
403 1
                $ret .= $comment;
404 1
                $comment = '';
405 1
            }
406
407
            // Iteration finished, consider current token as previous.
408 7
            $prev = $curr;
409 7
        }
410
411 7
        if ($this->options['type'] === 'cli') {
412 1
            return $ret . "\e[0m";
413
        }
414
415 6
        return $ret;
416
    }
417
418
    /**
419
     * Tries to print the query and returns the result.
420
     *
421
     * @param Token $token The token to be printed.
422
     *
423
     * @return string
424
     */
425 7
    public function toString($token)
426
    {
427 7
        $text = $token->token;
428
429 7
        foreach ($this->options['formats'] as $format) {
430 7
            if (($token->type === $format['type'])
431 7
                && (($token->flags & $format['flags']) === $format['flags'])
432 7
            ) {
433
                // Running transformation function.
434 7
                if (!empty($format['function'])) {
435 7
                    $func = $format['function'];
436 7
                    $text = $func($text);
437 7
                }
438
439
                // Formatting HTML.
440 7
                if ($this->options['type'] === 'html') {
441 6
                    return '<span ' . $format['html'] . '>' . $text . '</span>';
442 1
                } elseif ($this->options['type'] === 'cli') {
443 1
                    return $format['cli'] . $text;
444
                }
445
446
                break;
447
            }
448 7
        }
449
450 4
        if ($this->options['type'] === 'cli') {
451
            return "\e[39m" . $text;
452
        }
453 4
        return $text;
454
    }
455
456
    /**
457
     * Formats a query.
458
     *
459
     * @param string $query   The query to be formatted
460
     * @param array  $options The formatting options.
461
     *
462
     * @return string          The formatted string.
463
     */
464 7
    public static function format($query, array $options = array())
465
    {
466 7
        $lexer = new Lexer($query);
467 7
        $formatter = new Formatter($options);
468 7
        return $formatter->formatList($lexer->list);
469
    }
470
471
    /**
472
     * Computes the length of a group.
473
     *
474
     * A group is delimited by a pair of brackets.
475
     *
476
     * @param TokensList $list The list of tokens.
477
     *
478
     * @return int
479
     */
480 3
    public static function getGroupLength($list)
481
    {
482
        /**
483
         * The number of opening brackets found.
484
         * This counter starts at one because by the time this function called,
485
         * the list already advanced one position and the opening bracket was
486
         * already parsed.
487
         *
488
         * @var int $count
489
         */
490 3
        $count = 1;
491
492
        /**
493
         * The length of this group.
494
         *
495
         * @var int $length
496
         */
497 3
        $length = 0;
498
499 3
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
500
            // Counting the brackets.
501 3
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
502 3
                if ($list->tokens[$idx]->value === '(') {
503
                    ++$count;
504 3
                } elseif ($list->tokens[$idx]->value === ')') {
505 3
                    --$count;
506 3
                    if ($count == 0) {
507 3
                        break;
508
                    }
509
                }
510 1
            }
511
512
            // Keeping track of this group's length.
513 2
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
514 2
        }
515
516 3
        return $length;
517
    }
518
519
    /**
520
     * Checks if a token is a statement or a clause inside a statement.
521
     *
522
     * @param Token $token The token to be checked.
523
     *
524
     * @return int|bool
525
     */
526 7
    public static function isClause($token)
527
    {
528 7
        if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER'))
529 7
            || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value])))
530 7
        ) {
531 7
            return 2;
532 7
        } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) {
533 3
            return 1;
534
        }
535 7
        return false;
536
    }
537
}
538