Completed
Push — master ( 65f66e...428edc )
by Michal
04:14
created

Formatter::escapeConsole()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 14
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 9
nc 1
nop 1
dl 0
loc 14
ccs 9
cts 9
cp 1
crap 1
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 */
6
7
namespace PhpMyAdmin\SqlParser\Utils;
8
9
use PhpMyAdmin\SqlParser\Lexer;
10
use PhpMyAdmin\SqlParser\Parser;
11
use PhpMyAdmin\SqlParser\Token;
12
use PhpMyAdmin\SqlParser\TokensList;
13
14
/**
15
 * Utilities that are used for formatting queries.
16
 *
17
 * @category   Misc
18
 *
19
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
20
 */
21
class Formatter
22
{
23
    /**
24
     * The formatting options.
25
     *
26
     * @var array
27
     */
28
    public $options;
29
30
    /**
31
     * Clauses that must be inlined.
32
     *
33
     * These clauses usually are short and it's nicer to have them inline.
34
     *
35
     * @var array
36
     */
37
    public static $INLINE_CLAUSES = array(
38
        'CREATE' => true,
39
        'LIMIT' => true,
40
        'PARTITION BY' => true,
41
        'PARTITION' => true,
42
        'PROCEDURE' => true,
43
        'SUBPARTITION BY' => true,
44
        'VALUES' => true,
45
    );
46
47
    /**
48
     * Constructor.
49
     *
50
     * @param array $options the formatting options
51
     */
52 19
    public function __construct(array $options = array())
53
    {
54 19
        $this->options = $this->getMergedOptions($options);
55 19
    }
56
57
    /**
58
     * The specified formatting options are merged with the default values.
59
     *
60
     * @param array $options
61
     *
62
     * @return array
63
     */
64 23
    private function getMergedOptions(array $options)
65
    {
66 23
        $options = array_merge(
67 23
            $this->getDefaultOptions(),
68
            $options
69 23
        );
70
71 23
        if (isset($options['formats'])) {
72 4
            $options['formats'] = self::mergeFormats($this->getDefaultFormats(), $options['formats']);
73 4
        } else {
74 19
            $options['formats'] = $this->getDefaultFormats();
75
        }
76
77 23
        if (is_null($options['line_ending'])) {
78 19
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
79 19
        }
80
81 23
        if (is_null($options['indentation'])) {
82 23
            $options['indentation'] = $options['type'] === 'html' ? '&nbsp;&nbsp;&nbsp;&nbsp;' : '    ';
83 23
        }
84
85
        // `parts_newline` requires `clause_newline`
86 23
        $options['parts_newline'] &= $options['clause_newline'];
87
88 23
        return $options;
89
    }
90
91
    /**
92
     * The default formatting options.
93
     *
94
     * @return array
95
     */
96 19
    protected function getDefaultOptions()
97
    {
98
        return array(
99
            /*
100
             * The format of the result.
101
             *
102
             * @var string The type ('text', 'cli' or 'html')
103
             */
104 19
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
105
106
            /*
107
             * The line ending used.
108
             * By default, for text this is "\n" and for HTML this is "<br/>".
109
             *
110
             * @var string
111
             */
112 19
            'line_ending' => null,
113
114
            /*
115
             * The string used for indentation.
116
             *
117
             * @var string
118
             */
119 19
            'indentation' => null,
120
121
            /*
122
             * Whether comments should be removed or not.
123
             *
124
             * @var bool
125
             */
126 19
            'remove_comments' => false,
127
128
            /*
129
             * Whether each clause should be on a new line.
130
             *
131
             * @var bool
132
             */
133 19
            'clause_newline' => true,
134
135
            /*
136
             * Whether each part should be on a new line.
137
             * Parts are delimited by brackets and commas.
138
             *
139
             * @var bool
140
             */
141 19
            'parts_newline' => true,
142
143
            /*
144
             * Whether each part of each clause should be indented.
145
             *
146
             * @var bool
147
             */
148 19
            'indent_parts' => true,
149 19
        );
150
    }
151
152
    /**
153
     * The styles used for HTML formatting.
154
     * array($type, $flags, $span, $callback).
155
     *
156
     * @return array
157
     */
158 19
    protected function getDefaultFormats()
159
    {
160
        return array(
161
            array(
162 19
                'type' => Token::TYPE_KEYWORD,
163 19
                'flags' => Token::FLAG_KEYWORD_RESERVED,
164 19
                'html' => 'class="sql-reserved"',
165 19
                'cli' => "\x1b[35m",
166 19
                'function' => 'strtoupper',
167 19
            ),
168
            array(
169 19
                'type' => Token::TYPE_KEYWORD,
170 19
                'flags' => 0,
171 19
                'html' => 'class="sql-keyword"',
172 19
                'cli' => "\x1b[95m",
173 19
                'function' => 'strtoupper',
174 19
            ),
175
            array(
176 19
                'type' => Token::TYPE_COMMENT,
177 19
                'flags' => 0,
178 19
                'html' => 'class="sql-comment"',
179 19
                'cli' => "\x1b[37m",
180 19
                'function' => '',
181 19
            ),
182
            array(
183 19
                'type' => Token::TYPE_BOOL,
184 19
                'flags' => 0,
185 19
                'html' => 'class="sql-atom"',
186 19
                'cli' => "\x1b[36m",
187 19
                'function' => 'strtoupper',
188 19
            ),
189
            array(
190 19
                'type' => Token::TYPE_NUMBER,
191 19
                'flags' => 0,
192 19
                'html' => 'class="sql-number"',
193 19
                'cli' => "\x1b[92m",
194 19
                'function' => 'strtolower',
195 19
            ),
196
            array(
197 19
                'type' => Token::TYPE_STRING,
198 19
                'flags' => 0,
199 19
                'html' => 'class="sql-string"',
200 19
                'cli' => "\x1b[91m",
201 19
                'function' => '',
202 19
            ),
203
            array(
204 19
                'type' => Token::TYPE_SYMBOL,
205 19
                'flags' => 0,
206 19
                'html' => 'class="sql-variable"',
207 19
                'cli' => "\x1b[36m",
208 19
                'function' => '',
209 19
            ),
210 19
        );
211
    }
212
213 4
    private static function mergeFormats(array $formats, array $newFormats)
214
    {
215 4
        $added = array();
216 4
        $integers = array('flags', 'type');
217 4
        $strings = array('html', 'cli', 'function');
218
219
        /* Sanitize the array so that we do not have to care later */
220 4
        foreach ($newFormats as $j => $new) {
221 4
            foreach ($integers as $name) {
222 4
                if (!isset($new[$name])) {
223 3
                    $newFormats[$j][$name] = 0;
224 3
                }
225 4
            }
226 4
            foreach ($strings as $name) {
227 4
                if (!isset($new[$name])) {
228 4
                    $newFormats[$j][$name] = '';
229 4
                }
230 4
            }
231 4
        }
232
233
        /* Process changes to existing formats */
234 4
        foreach ($formats as $i => $original) {
235 4
            foreach ($newFormats as $j => $new) {
236 4
                if ($new['type'] === $original['type']
237 4
                    && $original['flags'] === $new['flags']
238 4
                ) {
239 3
                    $formats[$i] = $new;
240 3
                    $added[] = $j;
241 3
                }
242 4
            }
243 4
        }
244
245
        /* Add not already handled formats */
246 4
        foreach ($newFormats as $j => $new) {
247 4
            if (!in_array($j, $added)) {
248 1
                $formats[] = $new;
249 1
            }
250 4
        }
251
252 4
        return $formats;
253
    }
254
255
    /**
256
     * Formats the given list of tokens.
257
     *
258
     * @param TokensList $list the list of tokens
259
     *
260
     * @return string
261
     */
262 19
    public function formatList($list)
263
    {
264
        /**
265
         * The query to be returned.
266
         *
267
         * @var string
268
         */
269 19
        $ret = '';
270
271
        /**
272
         * The indentation level.
273
         *
274
         * @var int
275
         */
276 19
        $indent = 0;
277
278
        /**
279
         * Whether the line ended.
280
         *
281
         * @var bool
282
         */
283 19
        $lineEnded = false;
284
285
        /**
286
         * Whether current group is short (no linebreaks).
287
         *
288
         * @var bool
289
         */
290 19
        $shortGroup = false;
291
292
        /**
293
         * The name of the last clause.
294
         *
295
         * @var string
296
         */
297 19
        $lastClause = '';
298
299
        /**
300
         * A stack that keeps track of the indentation level every time a new
301
         * block is found.
302
         *
303
         * @var array
304
         */
305 19
        $blocksIndentation = array();
306
307
        /**
308
         * A stack that keeps track of the line endings every time a new block
309
         * is found.
310
         *
311
         * @var array
312
         */
313 19
        $blocksLineEndings = array();
314
315
        /**
316
         * Whether clause's options were formatted.
317
         *
318
         * @var bool
319
         */
320 19
        $formattedOptions = false;
321
322
        /**
323
         * Previously parsed token.
324
         *
325
         * @var Token
326
         */
327 19
        $prev = null;
328
329
        // In order to be able to format the queries correctly, the next token
330
        // must be taken into consideration. The loop below uses two pointers,
331
        // `$prev` and `$curr` which store two consecutive tokens.
332
        // Actually, at every iteration the previous token is being used.
333 19
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
334
            /**
335
             * Token parsed at this moment.
336
             *
337
             * @var Token
338
             */
339 19
            $curr = $list->tokens[$list->idx];
340
341 19
            if ($curr->type === Token::TYPE_WHITESPACE) {
342
                // Whitespaces are skipped because the formatter adds its own.
343 19
                continue;
344
            }
345
346 19
            if ($curr->type === Token::TYPE_COMMENT && $this->options['remove_comments']) {
347
                // Skip Comments if option `remove_comments` is enabled
348 1
                continue;
349
            }
350
351
            // Checking if pointers were initialized.
352
            /**
353
             * Previous Token.
354
             *
355
             * @var Token
356
             */
357 19
            if ($prev !== null) {
358
                // Checking if a new clause started.
359 19
                if (static::isClause($prev) !== false) {
360 19
                    $lastClause = $prev->value;
361 19
                    $formattedOptions = false;
362 19
                }
363
364
                // The options of a clause should stay on the same line and everything that follows.
365 19
                if ($this->options['parts_newline']
366 19
                    && !$formattedOptions
367 19
                    && empty(self::$INLINE_CLAUSES[$lastClause])
368 19
                    && (
369 18
                        $curr->type !== Token::TYPE_KEYWORD
370 18
                        || (
371 3
                            $curr->type === Token::TYPE_KEYWORD
372 3
                            && $curr->flags & Token::FLAG_KEYWORD_FUNCTION
373 3
                        )
374 3
                    )
375 19
                ) {
376 18
                    $formattedOptions = true;
377 18
                    $lineEnded = true;
378 18
                    ++$indent;
379 18
                }
380
381
                // Checking if this clause ended.
382 19
                if ($tmp = static::isClause($curr)) {
383 7
                    if ($tmp == 2 || $this->options['clause_newline']) {
384 7
                        $lineEnded = true;
385 7
                        if ($this->options['parts_newline']) {
386 7
                            --$indent;
387 7
                        }
388 7
                    }
389 7
                }
390
391
                // Indenting BEGIN ... END blocks.
392 19
                if ($prev->type === Token::TYPE_KEYWORD && $prev->value === 'BEGIN') {
393 1
                    $lineEnded = true;
394 1
                    array_push($blocksIndentation, $indent);
395 1
                    ++$indent;
396 19 View Code Duplication
                } elseif ($curr->type === Token::TYPE_KEYWORD && $curr->value === 'END') {
397 1
                    $lineEnded = true;
398 1
                    $indent = array_pop($blocksIndentation);
399 1
                }
400
401
                // Formatting fragments delimited by comma.
402 19
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === ',') {
403
                    // Fragments delimited by a comma are broken into multiple
404
                    // pieces only if the clause is not inlined or this fragment
405
                    // is between brackets that are on new line.
406 4
                    if (end($blocksLineEndings) === true
407 4
                        || (
408 3
                            empty(self::$INLINE_CLAUSES[$lastClause])
409 3
                            && !$shortGroup
410 3
                            && $this->options['parts_newline']
411 2
                        )
412 4
                    ) {
413 3
                        $lineEnded = true;
414 3
                    }
415 4
                }
416
417
                // Handling brackets.
418
                // Brackets are indented only if the length of the fragment between
419
                // them is longer than 30 characters.
420 19
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === '(') {
421 5
                    array_push($blocksIndentation, $indent);
422 5
                    $shortGroup = true;
423 5
                    if (static::getGroupLength($list) > 30) {
424 1
                        ++$indent;
425 1
                        $lineEnded = true;
426 1
                        $shortGroup = false;
427 1
                    }
428 5
                    array_push($blocksLineEndings, $lineEnded);
429 19 View Code Duplication
                } elseif ($curr->type === Token::TYPE_OPERATOR && $curr->value === ')') {
430 4
                    $indent = array_pop($blocksIndentation);
431 4
                    $lineEnded |= array_pop($blocksLineEndings);
432 4
                    $shortGroup = false;
433 4
                }
434
435
                // Adding the token.
436 19
                $ret .= $this->toString($prev);
437
438
                // Finishing the line.
439 19
                if ($lineEnded) {
440 19
                    if ($indent < 0) {
441
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
442 2
                        $indent = 0;
443 2
                    }
444
445 19
                    $ret .= $this->options['line_ending']
446 19
                        . str_repeat($this->options['indentation'], $indent);
447
448 19
                    $lineEnded = false;
449 19
                } else {
450
                    // If the line ended there is no point in adding whitespaces.
451
                    // Also, some tokens do not have spaces before or after them.
452
                    if (
453
                        // A space after delimiters that are longer than 2 characters.
454 19
                        $prev->value === 'DELIMITER'
455 19
                        || !(
456 19
                            ($prev->type === Token::TYPE_OPERATOR && ($prev->value === '.' || $prev->value === '('))
457
                            // No space after . (
458 19
                            || ($curr->type === Token::TYPE_OPERATOR && ($curr->value === '.' || $curr->value === ',' || $curr->value === '(' || $curr->value === ')'))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 167 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
459
                            // No space before . , ( )
460 19
                            || $curr->type === Token::TYPE_DELIMITER && mb_strlen($curr->value, 'UTF-8') < 2
461 19
                        )
462 19
                    ) {
463 11
                        $ret .= ' ';
464 11
                    }
465
                }
466 19
            }
467
468
            // Iteration finished, consider current token as previous.
469 19
            $prev = $curr;
470 19
        }
471
472 19
        if ($this->options['type'] === 'cli') {
473 5
            return $ret . "\x1b[0m";
474
        }
475
476 14
        return $ret;
477
    }
478
479 5
    public function escapeConsole($string)
480
    {
481 5
        return str_replace(
482
            array(
483 5
                "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", "\x0E", "\x0F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
484 5
                "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", "\x1C",  "\x1D", "\x1E", "\x1F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 144 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
485 5
            ),
486
            array(
487 5
                '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
488 5
                '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C',  '\x1D', '\x1E', '\x1F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 144 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
489 5
            ),
490
            $string
491 5
        );
492
    }
493
494
    /**
495
     * Tries to print the query and returns the result.
496
     *
497
     * @param Token $token the token to be printed
498
     *
499
     * @return string
500
     */
501 19
    public function toString($token)
502
    {
503 19
        $text = $token->token;
504
505 19
        foreach ($this->options['formats'] as $format) {
506 19
            if ($token->type === $format['type']
507 19
                && ($token->flags & $format['flags']) === $format['flags']
508 19
            ) {
509
                // Running transformation function.
510 19
                if (!empty($format['function'])) {
511 19
                    $func = $format['function'];
512 19
                    $text = $func($text);
513 19
                }
514
515
                // Formatting HTML.
516 19
                if ($this->options['type'] === 'html') {
517 12
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
518 7
                } elseif ($this->options['type'] === 'cli') {
519 5
                    return $format['cli'] . $this->escapeConsole($text);
520
                }
521
522 2
                break;
523
            }
524 19
        }
525
526 11
        if ($this->options['type'] === 'cli') {
527 2
            return "\x1b[39m" . $this->escapeConsole($text);
528 9
        } elseif ($this->options['type'] === 'html') {
529 7
            return htmlspecialchars($text, ENT_NOQUOTES);
530
        }
531
532 2
        return $text;
533
    }
534
535
    /**
536
     * Formats a query.
537
     *
538
     * @param string $query   The query to be formatted
539
     * @param array  $options the formatting options
540
     *
541
     * @return string the formatted string
542
     */
543 19
    public static function format($query, array $options = array())
544
    {
545 19
        $lexer = new Lexer($query);
546 19
        $formatter = new self($options);
547
548 19
        return $formatter->formatList($lexer->list);
549
    }
550
551
    /**
552
     * Computes the length of a group.
553
     *
554
     * A group is delimited by a pair of brackets.
555
     *
556
     * @param TokensList $list the list of tokens
557
     *
558
     * @return int
559
     */
560 5
    public static function getGroupLength($list)
561
    {
562
        /**
563
         * The number of opening brackets found.
564
         * This counter starts at one because by the time this function called,
565
         * the list already advanced one position and the opening bracket was
566
         * already parsed.
567
         *
568
         * @var int
569
         */
570 5
        $count = 1;
571
572
        /**
573
         * The length of this group.
574
         *
575
         * @var int
576
         */
577 5
        $length = 0;
578
579 5
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
580
            // Counting the brackets.
581 5
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
582 5
                if ($list->tokens[$idx]->value === '(') {
583 1
                    ++$count;
584 5
                } elseif ($list->tokens[$idx]->value === ')') {
585 5
                    --$count;
586 5
                    if ($count == 0) {
587 5
                        break;
588
                    }
589 1
                }
590 3
            }
591
592
            // Keeping track of this group's length.
593 4
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
594 4
        }
595
596 5
        return $length;
597
    }
598
599
    /**
600
     * Checks if a token is a statement or a clause inside a statement.
601
     *
602
     * @param Token $token the token to be checked
603
     *
604
     * @return int|bool
605
     */
606 19
    public static function isClause($token)
607
    {
608
        if (
609 19
            ($token->type === Token::TYPE_KEYWORD && isset(Parser::$STATEMENT_PARSERS[$token->value]))
610 19
            || ($token->type === Token::TYPE_NONE && strtoupper($token->token) === 'DELIMITER')
611 19
        ) {
612 19
            return 2;
613
        } elseif (
614 19
            $token->type === Token::TYPE_KEYWORD && isset(Parser::$KEYWORD_PARSERS[$token->value])
615 19
        ) {
616 7
            return 1;
617
        }
618
619 19
        return false;
620
    }
621
}
622