Completed
Push — master ( 00eb34...e18815 )
by Michal
228:24 queued 163:23
created

Formatter::escapeConsole()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 20
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 15
nc 1
nop 1
dl 0
loc 20
ccs 12
cts 12
cp 1
crap 1
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 *
6
 * @package    SqlParser
7
 * @subpackage Utils
8
 */
9
namespace SqlParser\Utils;
10
11
use SqlParser\Lexer;
12
use SqlParser\Parser;
13
use SqlParser\Token;
14
use SqlParser\TokensList;
15
16
/**
17
 * Utilities that are used for formatting queries.
18
 *
19
 * @category   Misc
20
 * @package    SqlParser
21
 * @subpackage Utils
22
 * @author     Dan Ungureanu <[email protected]>
23
 * @license    http://opensource.org/licenses/GPL-2.0 GNU Public License
24
 */
25
class Formatter
26
{
27
28
    /**
29
     * The formatting options.
30
     *
31
     * @var array
32
     */
33
    public $options;
34
35
    /**
36
     * Clauses that must be inlined.
37
     *
38
     * These clauses usually are short and it's nicer to have them inline.
39
     *
40
     * @var array
41
     */
42
    public static $INLINE_CLAUSES = array(
43
        'CREATE'                        => true,
44
        'LIMIT'                         => true,
45
        'PARTITION BY'                  => true,
46
        'PARTITION'                     => true,
47
        'PROCEDURE'                     => true,
48
        'SUBPARTITION BY'               => true,
49
        'VALUES'                        => true,
50
    );
51
52
    /**
53
     * Constructor.
54
     *
55
     * @param array $options The formatting options.
56
     */
57 8
    public function __construct(array $options = array())
58
    {
59
        // The specified formatting options are merged with the default values.
60 8
        $this->options = array_merge(
61
            array(
62
63
                /**
64
                 * The format of the result.
65
                 *
66
                 * @var string The type ('text', 'cli' or 'html')
67
                 */
68 8
                'type' => php_sapi_name() == 'cli' ? 'cli' : 'text',
69
70
                /**
71
                 * The line ending used.
72
                 * By default, for text this is "\n" and for HTML this is "<br/>".
73
                 *
74
                 * @var string
75
                 */
76 8
                'line_ending' => NULL,
77
78
                /**
79
                 * The string used for indentation.
80
                 *
81
                 * @var string
82
                 */
83 8
                'indentation' => '  ',
84
85
                /**
86
                 * Whether comments should be removed or not.
87
                 *
88
                 * @var bool
89
                 */
90 8
                'remove_comments' => false,
91
92
                /**
93
                 * Whether each clause should be on a new line.
94
                 *
95
                 * @var bool
96
                 */
97 8
                'clause_newline' => true,
98
99
                /**
100
                 * Whether each part should be on a new line.
101
                 * Parts are delimited by brackets and commas.
102
                 *
103
                 * @var bool
104
                 */
105 8
                'parts_newline' => true,
106
107
                /**
108
                 * Whether each part of each clause should be indented.
109
                 *
110
                 * @var bool
111
                 */
112 8
                'indent_parts' => true,
113
114
                /**
115
                 * The styles used for HTML formatting.
116
                 * array($type, $flags, $span, $callback)
117
                 *
118
                 * @var array[]
119
                 */
120
                'formats' => array(
121
                    array(
122 8
                        'type'      => Token::TYPE_KEYWORD,
123 8
                        'flags'     => Token::FLAG_KEYWORD_RESERVED,
124 8
                        'html'      => 'class="sql-reserved"',
125 8
                        'cli'       => "\x1b[35m",
126 8
                        'function'  => 'strtoupper',
127 8
                    ),
128
                    array(
129 8
                        'type'      => Token::TYPE_KEYWORD,
130 8
                        'flags'     => 0,
131 8
                        'html'      => 'class="sql-keyword"',
132 8
                        'cli'       => "\x1b[95m",
133 8
                        'function'  => 'strtoupper',
134 8
                    ),
135
                    array(
136 8
                        'type'      => Token::TYPE_COMMENT,
137 8
                        'flags'     => 0,
138 8
                        'html'      => 'class="sql-comment"',
139 8
                        'cli'       => "\x1b[37m",
140 8
                        'function'  => '',
141 8
                    ),
142
                    array(
143 8
                        'type'      => Token::TYPE_BOOL,
144 8
                        'flags'     => 0,
145 8
                        'html'      => 'class="sql-atom"',
146 8
                        'cli'       => "\x1b[36m",
147 8
                        'function'  => 'strtoupper',
148 8
                    ),
149
                    array(
150 8
                        'type'      => Token::TYPE_NUMBER,
151 8
                        'flags'     => 0,
152 8
                        'html'      => 'class="sql-number"',
153 8
                        'cli'       => "\x1b[92m",
154 8
                        'function'  => 'strtolower',
155 8
                    ),
156
                    array(
157 8
                        'type'      => Token::TYPE_STRING,
158 8
                        'flags'     => 0,
159 8
                        'html'      => 'class="sql-string"',
160 8
                        'cli'       => "\x1b[91m",
161 8
                        'function'  => '',
162 8
                    ),
163
                    array(
164 8
                        'type'      => Token::TYPE_SYMBOL,
165 8
                        'flags'     => 0,
166 8
                        'html'      => 'class="sql-variable"',
167 8
                        'cli'       => "\x1b[36m",
168 8
                        'function'  => '',
169 8
                    ),
170
                )
171 8
            ),
172
            $options
173 8
        );
174
175
        if (is_null($this->options['line_ending'])) {
176 8
            $this->options['line_ending'] = $this->options['type'] == 'html' ? '<br/>' : "\n";
177 8
        }
178
179
        // `parts_newline` requires `clause_newline`
180
        $this->options['parts_newline'] &= $this->options['clause_newline'];
181
    }
182
183
    /**
184
     * Formats the given list of tokens.
185
     *
186 8
     * @param TokensList $list The list of tokens.
187
     *
188
     * @return string
189
     */
190
    public function formatList($list)
191
    {
192
193
        /**
194 8
         * The query to be returned.
195
         *
196
         * @var string $ret
197
         */
198
        $ret = '';
199
200
        /**
201 8
         * The indentation level.
202
         *
203
         * @var int $indent
204
         */
205
        $indent = 0;
206
207
        /**
208 8
         * Whether the line ended.
209
         *
210
         * @var bool $lineEnded
211
         */
212
        $lineEnded = false;
213
214
        /**
215 8
         * Whether current group is short (no linebreaks)
216
         *
217
         * @var bool $shortGroup
218
         */
219
        $shortGroup = false;
220
221
        /**
222 8
         * The name of the last clause.
223
         *
224
         * @var string $lastClause
225
         */
226
        $lastClause = '';
227
228
        /**
229
         * A stack that keeps track of the indentation level every time a new
230 8
         * block is found.
231
         *
232
         * @var array $blocksIndentation
233
         */
234
        $blocksIndentation = array();
235
236
        /**
237
         * A stack that keeps track of the line endings every time a new block
238 8
         * is found.
239
         *
240
         * @var array $blocksLineEndings
241
         */
242
        $blocksLineEndings = array();
243
244
        /**
245 8
         * Whether clause's options were formatted.
246
         *
247
         * @var bool $formattedOptions
248
         */
249
        $formattedOptions = false;
250
251
        /**
252 8
         * Previously parsed token.
253
         *
254
         * @var Token $prev
255
         */
256
        $prev = null;
257
258
        /**
259
         * Comments are being formatted separately to maintain the whitespaces
260 8
         * before and after them.
261
         *
262
         * @var string $comment
263
         */
264
        $comment = '';
265
266 8
        // In order to be able to format the queries correctly, the next token
267
        // must be taken into consideration. The loop below uses two pointers,
268
        // `$prev` and `$curr` which store two consecutive tokens.
269
        // Actually, at every iteration the previous token is being used.
270
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
271
            /**
272 8
             * Token parsed at this moment.
273
             *
274 8
             * @var Token $curr
275
             */
276 8
            $curr = $list->tokens[$list->idx];
277 8
278
            if ($curr->type === Token::TYPE_WHITESPACE) {
279 1
                // Whitespaces are skipped because the formatter adds its own.
280
                continue;
281
            } elseif ($curr->type === Token::TYPE_COMMENT) {
282
                // Whether the comments should be parsed.
283 1
                if (!empty($this->options['remove_comments'])) {
284
                    continue;
285
                }
286 1
287 1
                if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) {
288 1
                    // The whitespaces before and after are preserved for
289 1
                    // formatting reasons.
290 1
                    $comment .= $list->tokens[$list->idx - 1]->token;
291 1
                }
292
                $comment .= $this->toString($curr);
293
                if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE)
294
                    && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT)
295
                ) {
296
                    // Adding the next whitespace only there is no comment that
297
                    // follows it immediately which may cause adding a
298
                    // whitespace twice.
299 1
                    $comment .= $list->tokens[$list->idx + 1]->token;
300
                }
301
302
                // Everything was handled here, no need to continue.
303 8
                continue;
304
            }
305 8
306 8
            // Checking if pointers were initialized.
307 8
            if ($prev !== null) {
308 8
                // Checking if a new clause started.
309
                if (static::isClause($prev) !== false) {
310
                    $lastClause = $prev->value;
311 8
                    $formattedOptions = false;
312 8
                }
313 8
314 8
                // The options of a clause should stay on the same line and everything that follows.
315 3
                if (($this->options['parts_newline'])
316 3
                    && (!$formattedOptions)
317 8
                    && (empty(self::$INLINE_CLAUSES[$lastClause]))
318 8
                    && (($curr->type !== Token::TYPE_KEYWORD)
319 8
                    || (($curr->type === Token::TYPE_KEYWORD)
320 8
                    && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION)))
321 8
                ) {
322
                    $formattedOptions = true;
323
                    $lineEnded = true;
324 8
                    ++$indent;
325 4
                }
326 4
327 4
                // Checking if this clause ended.
328 4
                if ($tmp = static::isClause($curr)) {
329 4
                    if (($tmp == 2) || ($this->options['clause_newline'])) {
330 4
                        $lineEnded = true;
331 4
                        if ($this->options['parts_newline']) {
332
                            --$indent;
333
                        }
334 8
                    }
335 1
                }
336 1
337 1
                // Indenting BEGIN ... END blocks.
338 8
                if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) {
339 1
                    $lineEnded = true;
340 1
                    array_push($blocksIndentation, $indent);
341 1
                    ++$indent;
342 View Code Duplication
                } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) {
343
                    $lineEnded = true;
344 8
                    $indent = array_pop($blocksIndentation);
345
                }
346
347
                // Formatting fragments delimited by comma.
348 2
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) {
349 2
                    // Fragments delimited by a comma are broken into multiple
350 2
                    // pieces only if the clause is not inlined or this fragment
351 2
                    // is between brackets that are on new line.
352 2
                    if (((empty(self::$INLINE_CLAUSES[$lastClause]))
353 1
                        && ! $shortGroup
354 1
                        && ($this->options['parts_newline']))
355 2
                        || (end($blocksLineEndings) === true)
356
                    ) {
357
                        $lineEnded = true;
358
                    }
359
                }
360 8
361 4
                // Handling brackets.
362 4
                // Brackets are indented only if the length of the fragment between
363 4
                // them is longer than 30 characters.
364
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) {
365
                    array_push($blocksIndentation, $indent);
366
                    $shortGroup = true;
367
                    if (static::getGroupLength($list) > 30) {
368 4
                        ++$indent;
369 8
                        $lineEnded = true;
370 3
                        $shortGroup = false;
371 3
                    }
372 3
                    array_push($blocksLineEndings, $lineEnded);
373 3 View Code Duplication
                } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) {
374
                    $indent = array_pop($blocksIndentation);
375
                    $lineEnded |= array_pop($blocksLineEndings);
376
                    $shortGroup = false;
377 8
                }
378 8
379 8
                // Delimiter must be placed on the same line with the last
380
                // clause.
381
                if ($curr->type === Token::TYPE_DELIMITER) {
382 8
                    $lineEnded = false;
383
                }
384
385 8
                // Adding the token.
386 8
                $ret .= $this->toString($prev);
387
388 2
                // Finishing the line.
389 2
                if ($lineEnded) {
390
                    if ($indent < 0) {
391 8
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
392 8
                        $indent = 0;
393 8
                    }
394 8
395 8
                    if ($curr->type !== Token::TYPE_COMMENT) {
396 8
                        $ret .= $this->options['line_ending']
397
                            . str_repeat($this->options['indentation'], $indent);
398
                    }
399 8
                    $lineEnded = false;
400
                } else {
401 8
                    // If the line ended there is no point in adding whitespaces.
402 5
                    // Also, some tokens do not have spaces before or after them.
403
                    if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '(')))
404 8
                        // No space after . (
405
                        || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',')
406 8
                        || ($curr->value === '(') || ($curr->value === ')')))
407 8
                        // No space before . , ( )
408 4
                        || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2))
409 4
                        // A space after delimiters that are longer than 2 characters.
410
                        || ($prev->value === 'DELIMITER')
411 8
                    ) {
412
                        $ret .= ' ';
413 8
                    }
414 1
                }
415 1
            }
416 1
417
            if (!empty($comment)) {
418
                $ret .= $comment;
419 8
                $comment = '';
420 8
            }
421
422 8
            // Iteration finished, consider current token as previous.
423 1
            $prev = $curr;
424
        }
425
426 7
        if ($this->options['type'] === 'cli') {
427
            return $ret . "\x1b[0m";
428
        }
429
430
        return $ret;
431
    }
432
433
    public function escapeConsole($string)
434
    {
435
        return str_replace(
436 8
            array(
437
                "\x00", "\x01", "\x02", "\x03", "\x04",
438 8
                "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A",
439
                "\x0B","\x0C","\x0D", "\x0E", "\x0F", "\x10", "\x11",
440 8
                "\x12","\x13","\x14","\x15", "\x16", "\x17", "\x18",
441 8
                "\x19","\x1A","\x1B","\x1C","\x1D", "\x1E", "\x1F"
442 8
            ),
443 8
            array(
444
                '\x00', '\x01', '\x02', '\x03', '\x04',
445 8
                '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A',
446 8
                '\x0B', '\x0C', '\x0D', '\x0E', '\x0F', '\x10', '\x11',
447 8
                '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18',
448 8
                '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F'
449
            ),
450
            $string
451 8
        );
452 7
    }
453 1
454 1
    /**
455
     * Tries to print the query and returns the result.
456
     *
457
     * @param Token $token The token to be printed.
458
     *
459 8
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
460
     */
461 5
    public function toString($token)
462
    {
463
        $text = $token->token;
464 5
465
        foreach ($this->options['formats'] as $format) {
466
            if (($token->type === $format['type'])
467
                && (($token->flags & $format['flags']) === $format['flags'])
468
            ) {
469
                // Running transformation function.
470
                if (!empty($format['function'])) {
471
                    $func = $format['function'];
472
                    $text = $func($text);
473
                }
474
475 8
                // Formatting HTML.
476
                if ($this->options['type'] === 'html') {
477 8
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
478 8
                } elseif ($this->options['type'] === 'cli') {
479 8
                    return $format['cli'] . $this->escapeConsole($text);
480
                }
481
482
                break;
483
            }
484
        }
485
486
        if ($this->options['type'] === 'cli') {
487
            return "\x1b[39m" . $this->escapeConsole($text);
488
        } elseif ($this->options['type'] === 'html') {
489
            return htmlspecialchars($text, ENT_NOQUOTES);
490
        }
491 4
    }
492
493
    /**
494
     * Formats a query.
495
     *
496
     * @param string $query   The query to be formatted
497
     * @param array  $options The formatting options.
498
     *
499
     * @return string          The formatted string.
500
     */
501 4
    public static function format($query, array $options = array())
502
    {
503
        $lexer = new Lexer($query);
504
        $formatter = new Formatter($options);
505
        return $formatter->formatList($lexer->list);
506
    }
507
508 4
    /**
509
     * Computes the length of a group.
510 4
     *
511
     * A group is delimited by a pair of brackets.
512 4
     *
513 4
     * @param TokensList $list The list of tokens.
514
     *
515 4
     * @return int
516 4
     */
517 4
    public static function getGroupLength($list)
518 4
    {
519
        /**
520
         * The number of opening brackets found.
521 2
         * This counter starts at one because by the time this function called,
522
         * the list already advanced one position and the opening bracket was
523
         * already parsed.
524 3
         *
525 3
         * @var int $count
526
         */
527 4
        $count = 1;
528
529
        /**
530
         * The length of this group.
531
         *
532
         * @var int $length
533
         */
534
        $length = 0;
535
536
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
537 8
            // Counting the brackets.
538
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
539 8
                if ($list->tokens[$idx]->value === '(') {
540 8
                    ++$count;
541 8
                } elseif ($list->tokens[$idx]->value === ')') {
542 8
                    --$count;
543 8
                    if ($count == 0) {
544 4
                        break;
545
                    }
546 8
                }
547
            }
548
549
            // Keeping track of this group's length.
550
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
551
        }
552
553
        return $length;
554
    }
555
556
    /**
557
     * Checks if a token is a statement or a clause inside a statement.
558
     *
559
     * @param Token $token The token to be checked.
560
     *
561
     * @return int|bool
562
     */
563
    public static function isClause($token)
564
    {
565
        if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER'))
566
            || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value])))
567
        ) {
568
            return 2;
569
        } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) {
570
            return 1;
571
        }
572
        return false;
573
    }
574
}
575