Completed
Pull Request — master (#120)
by
unknown
03:29
created

Formatter::format()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 4
nc 1
nop 2
dl 0
loc 7
ccs 4
cts 4
cp 1
crap 1
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 */
6
7
namespace PhpMyAdmin\SqlParser\Utils;
8
9
use PhpMyAdmin\SqlParser\Lexer;
10
use PhpMyAdmin\SqlParser\Parser;
11
use PhpMyAdmin\SqlParser\Token;
12
use PhpMyAdmin\SqlParser\TokensList;
13
14
/**
15
 * Utilities that are used for formatting queries.
16
 *
17
 * @category   Misc
18
 *
19
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
20
 */
21
class Formatter
22
{
23
    /**
24
     * The formatting options.
25
     *
26
     * @var array
27
     */
28
    public $options;
29
30
    /**
31
     * Clauses that must be inlined.
32
     *
33
     * These clauses usually are short and it's nicer to have them inline.
34
     *
35
     * @var array
36
     */
37
    public static $INLINE_CLAUSES = array(
38
        'CREATE' => true,
39
        'LIMIT' => true,
40
        'PARTITION BY' => true,
41
        'PARTITION' => true,
42
        'PROCEDURE' => true,
43
        'SUBPARTITION BY' => true,
44
        'VALUES' => true,
45
    );
46
47
    /**
48
     * Constructor.
49
     *
50
     * @param array $options the formatting options
51
     */
52 18
    public function __construct(array $options = array())
53
    {
54 18
        $this->options = $this->getMergedOptions($options);
55 18
    }
56
57
    /**
58
     * The specified formatting options are merged with the default values.
59
     *
60
     * @param array $options
61
     *
62
     * @return array
63
     */
64 22
    private function getMergedOptions(array $options)
65
    {
66 22
        $options = array_merge(
67 22
            $this->getDefaultOptions(),
68
            $options
69 22
        );
70
71 22
        if (isset($options['formats'])) {
72 4
            $options['formats'] = self::mergeFormats($this->getDefaultFormats(), $options['formats']);
73 4
        } else {
74 18
            $options['formats'] = $this->getDefaultFormats();
75
        }
76
77 22
        if (is_null($options['line_ending'])) {
78 18
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
79 18
        }
80
81 22
        if (is_null($options['indentation'])) {
82 22
            $options['indentation'] = $options['type'] === 'html' ? '&nbsp;&nbsp;&nbsp;&nbsp;' : '    ';
83 22
        }
84
85
        // `parts_newline` requires `clause_newline`
86 22
        $options['parts_newline'] &= $options['clause_newline'];
87
88 22
        return $options;
89
    }
90
91
    /**
92
     * The default formatting options.
93
     *
94
     * @return array
95
     */
96 18
    protected function getDefaultOptions()
97
    {
98
        return array(
99
            /*
100
             * The format of the result.
101
             *
102
             * @var string The type ('text', 'cli' or 'html')
103
             */
104 18
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
105
106
            /*
107
             * The line ending used.
108
             * By default, for text this is "\n" and for HTML this is "<br/>".
109
             *
110
             * @var string
111
             */
112 18
            'line_ending' => null,
113
114
            /*
115
             * The string used for indentation.
116
             *
117
             * @var string
118
             */
119 18
            'indentation' => null,
120
121
            /*
122
             * Whether comments should be removed or not.
123
             *
124
             * @var bool
125
             */
126 18
            'remove_comments' => false,
127
128
            /*
129
             * Whether each clause should be on a new line.
130
             *
131
             * @var bool
132
             */
133 18
            'clause_newline' => true,
134
135
            /*
136
             * Whether each part should be on a new line.
137
             * Parts are delimited by brackets and commas.
138
             *
139
             * @var bool
140
             */
141 18
            'parts_newline' => true,
142
143
            /*
144
             * Whether each part of each clause should be indented.
145
             *
146
             * @var bool
147
             */
148 18
            'indent_parts' => true,
149 18
        );
150
    }
151
152
    /**
153
     * The styles used for HTML formatting.
154
     * array($type, $flags, $span, $callback).
155
     *
156
     * @return array
157
     */
158 18
    protected function getDefaultFormats()
159
    {
160
        return array(
161
            array(
162 18
                'type' => Token::TYPE_KEYWORD,
163 18
                'flags' => Token::FLAG_KEYWORD_RESERVED,
164 18
                'html' => 'class="sql-reserved"',
165 18
                'cli' => "\x1b[35m",
166 18
                'function' => 'strtoupper',
167 18
            ),
168
            array(
169 18
                'type' => Token::TYPE_KEYWORD,
170 18
                'flags' => 0,
171 18
                'html' => 'class="sql-keyword"',
172 18
                'cli' => "\x1b[95m",
173 18
                'function' => 'strtoupper',
174 18
            ),
175
            array(
176 18
                'type' => Token::TYPE_COMMENT,
177 18
                'flags' => 0,
178 18
                'html' => 'class="sql-comment"',
179 18
                'cli' => "\x1b[37m",
180 18
                'function' => '',
181 18
            ),
182
            array(
183 18
                'type' => Token::TYPE_BOOL,
184 18
                'flags' => 0,
185 18
                'html' => 'class="sql-atom"',
186 18
                'cli' => "\x1b[36m",
187 18
                'function' => 'strtoupper',
188 18
            ),
189
            array(
190 18
                'type' => Token::TYPE_NUMBER,
191 18
                'flags' => 0,
192 18
                'html' => 'class="sql-number"',
193 18
                'cli' => "\x1b[92m",
194 18
                'function' => 'strtolower',
195 18
            ),
196
            array(
197 18
                'type' => Token::TYPE_STRING,
198 18
                'flags' => 0,
199 18
                'html' => 'class="sql-string"',
200 18
                'cli' => "\x1b[91m",
201 18
                'function' => '',
202 18
            ),
203
            array(
204 18
                'type' => Token::TYPE_SYMBOL,
205 18
                'flags' => 0,
206 18
                'html' => 'class="sql-variable"',
207 18
                'cli' => "\x1b[36m",
208 18
                'function' => '',
209 18
            ),
210 18
        );
211
    }
212
213 4
    private static function mergeFormats(array $formats, array $newFormats)
214
    {
215 4
        $added = array();
216 4
        $integers = array('flags', 'type');
217 4
        $strings = array('html', 'cli', 'function');
218
219
        /* Sanitize the array so that we do not have to care later */
220 4
        foreach ($newFormats as $j => $new) {
221 4
            foreach ($integers as $name) {
222 4
                if (!isset($new[$name])) {
223 3
                    $newFormats[$j][$name] = 0;
224 3
                }
225 4
            }
226 4
            foreach ($strings as $name) {
227 4
                if (!isset($new[$name])) {
228 4
                    $newFormats[$j][$name] = '';
229 4
                }
230 4
            }
231 4
        }
232
233
        /* Process changes to existing formats */
234 4
        foreach ($formats as $i => $original) {
235 4
            foreach ($newFormats as $j => $new) {
236 4
                if ($new['type'] === $original['type']
237 4
                    && $original['flags'] === $new['flags']
238 4
                ) {
239 3
                    $formats[$i] = $new;
240 3
                    $added[] = $j;
241 3
                }
242 4
            }
243 4
        }
244
245
        /* Add not already handled formats */
246 4
        foreach ($newFormats as $j => $new) {
247 4
            if (!in_array($j, $added)) {
248 1
                $formats[] = $new;
249 1
            }
250 4
        }
251
252 4
        return $formats;
253
    }
254
255
    /**
256
     * Formats the given list of tokens.
257
     *
258
     * @param TokensList $list the list of tokens
259
     *
260
     * @return string
261
     */
262 18
    public function formatList($list)
263
    {
264
        /**
265
         * The query to be returned.
266
         *
267
         * @var string
268
         */
269 18
        $ret = '';
270
271
        /**
272
         * The indentation level.
273
         *
274
         * @var int
275
         */
276 18
        $indent = 0;
277
278
        /**
279
         * Whether the line ended.
280
         *
281
         * @var bool
282
         */
283 18
        $lineEnded = false;
284
285
        /**
286
         * Whether current group is short (no linebreaks).
287
         *
288
         * @var bool
289
         */
290 18
        $shortGroup = false;
291
292
        /**
293
         * The name of the last clause.
294
         *
295
         * @var string
296
         */
297 18
        $lastClause = '';
298
299
        /**
300
         * A stack that keeps track of the indentation level every time a new
301
         * block is found.
302
         *
303
         * @var array
304
         */
305 18
        $blocksIndentation = array();
306
307
        /**
308
         * A stack that keeps track of the line endings every time a new block
309
         * is found.
310
         *
311
         * @var array
312
         */
313 18
        $blocksLineEndings = array();
314
315
        /**
316
         * Whether clause's options were formatted.
317
         *
318
         * @var bool
319
         */
320 18
        $formattedOptions = false;
321
322
        /**
323
         * Previously parsed token.
324
         *
325
         * @var Token|null
326
         */
327 18
        $prev = null;
328
329
        // In order to be able to format the queries correctly, the next token
330
        // must be taken into consideration. The loop below uses two pointers,
331
        // `$prev` and `$curr` which store two consecutive tokens.
332
        // Actually, at every iteration the previous token is being used.
333 18
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
334
            /**
335
             * Token parsed at this moment.
336
             *
337
             * @var Token
338
             */
339 18
            $curr = $list->tokens[$list->idx];
340
341 18
            if ($curr->type === Token::TYPE_WHITESPACE) {
342
                // Whitespaces are skipped because the formatter adds its own.
343 17
                continue;
344
            }
345
346 18
            if ($curr->type === Token::TYPE_COMMENT && $this->options['remove_comments']) {
347
                // Skip Comments if option `remove_comments` is enabled
348 1
                continue;
349
            }
350
351
            // Checking if pointers were initialized.
352 18
            if ($prev !== null) {
353
                // Checking if a new clause started.
354 17
                if (static::isClause($prev) !== false) {
355 17
                    $lastClause = $prev->value;
356 17
                    $formattedOptions = false;
357 17
                }
358
359
                // The options of a clause should stay on the same line and everything that follows.
360 17
                if ($this->options['parts_newline']
361 17
                    && !$formattedOptions
362 17
                    && empty(self::$INLINE_CLAUSES[$lastClause])
363 17
                    && (
364 16
                        $curr->type !== Token::TYPE_KEYWORD
365 16
                        || (
366 4
                            $curr->type === Token::TYPE_KEYWORD
367 4
                            && $curr->flags & Token::FLAG_KEYWORD_FUNCTION
368 4
                        )
369 4
                    )
370 17
                ) {
371 16
                    $formattedOptions = true;
372 16
                    $lineEnded = true;
373 16
                    ++$indent;
374 16
                }
375
376
                // Checking if this clause ended.
377 17
                if ($tmp = static::isClause($curr)) {
378 7
                    if ($tmp == 2 || $this->options['clause_newline']) {
379 7
                        $lineEnded = true;
380 7
                        if ($this->options['parts_newline']) {
381 7
                            --$indent;
382 7
                        }
383 7
                    }
384 7
                }
385
386
                // Indenting BEGIN ... END blocks.
387 17
                if ($prev->type === Token::TYPE_KEYWORD && $prev->value === 'BEGIN') {
388
                    $lineEnded = true;
389
                    array_push($blocksIndentation, $indent);
390
                    ++$indent;
391 17 View Code Duplication
                } elseif ($curr->type === Token::TYPE_KEYWORD && $curr->value === 'END') {
392
                    $lineEnded = true;
393
                    $indent = array_pop($blocksIndentation);
394
                }
395
396
                // Formatting fragments delimited by comma.
397 17
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === ',') {
398
                    // Fragments delimited by a comma are broken into multiple
399
                    // pieces only if the clause is not inlined or this fragment
400
                    // is between brackets that are on new line.
401 4
                    if (end($blocksLineEndings) === true
402 4
                        || (
403 3
                            empty(self::$INLINE_CLAUSES[$lastClause])
404 3
                            && !$shortGroup
405 3
                            && $this->options['parts_newline']
406 2
                        )
407 4
                    ) {
408 3
                        $lineEnded = true;
409 3
                    }
410 4
                }
411
412
                // Handling brackets.
413
                // Brackets are indented only if the length of the fragment between
414
                // them is longer than 30 characters.
415 17
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === '(') {
416 6
                    array_push($blocksIndentation, $indent);
417 6
                    $shortGroup = true;
418 6
                    if (static::getGroupLength($list) > 30) {
419 1
                        ++$indent;
420 1
                        $lineEnded = true;
421 1
                        $shortGroup = false;
422 1
                    }
423 6
                    array_push($blocksLineEndings, $lineEnded);
424 17 View Code Duplication
                } elseif ($curr->type === Token::TYPE_OPERATOR && $curr->value === ')') {
425 5
                    $indent = array_pop($blocksIndentation);
426 5
                    $lineEnded |= array_pop($blocksLineEndings);
427 5
                    $shortGroup = false;
428 5
                }
429
430
                // Adding the token.
431 17
                $ret .= $this->toString($prev);
432
433
                // Finishing the line.
434 17
                if ($lineEnded) {
435 17
                    if ($indent < 0) {
436
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
437 3
                        $indent = 0;
438 3
                    }
439
440 17
                    $ret .= $this->options['line_ending']
441 17
                        . str_repeat($this->options['indentation'], $indent);
442
443 17
                    $lineEnded = false;
444 17
                } else {
445
                    // If the line ended there is no point in adding whitespaces.
446
                    // Also, some tokens do not have spaces before or after them.
447
                    if (
448
                        // A space after delimiters that are longer than 2 characters.
449 17
                        $prev->value === 'DELIMITER'
450 17
                        || !(
451 17
                            ($prev->type === Token::TYPE_OPERATOR && ($prev->value === '.' || $prev->value === '('))
452
                            // No space after . (
453 17
                            || ($curr->type === Token::TYPE_OPERATOR && ($curr->value === '.' || $curr->value === ',' || $curr->value === '(' || $curr->value === ')'))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 167 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
454
                            // No space before . , ( )
455 17
                            || $curr->type === Token::TYPE_DELIMITER && mb_strlen($curr->value, 'UTF-8') < 2
456 17
                        )
457 17
                    ) {
458 9
                        $ret .= ' ';
459 9
                    }
460
                }
461 17
            }
462
463
            // Iteration finished, consider current token as previous.
464 18
            $prev = $curr;
465 18
        }
466
467 18
        if ($this->options['type'] === 'cli') {
468 16
            return $ret . "\x1b[0m";
469
        }
470
471 16
        return $ret;
472
    }
473
474 15
    public function escapeConsole($string)
475
    {
476 15
        return str_replace(
477
            array(
478 15
                "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", "\x0E", "\x0F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
479 15
                "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", "\x1C",  "\x1D", "\x1E", "\x1F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 144 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
480 15
            ),
481
            array(
482 15
                '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
483 15
                '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C',  '\x1D', '\x1E', '\x1F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 144 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
484 15
            ),
485
            $string
486 15
        );
487
    }
488
489
    /**
490
     * Tries to print the query and returns the result.
491
     *
492
     * @param Token $token the token to be printed
493
     *
494
     * @return string
495
     */
496 17
    public function toString($token)
497
    {
498 17
        $text = $token->token;
499
500 17
        foreach ($this->options['formats'] as $format) {
501 17
            if ($token->type === $format['type']
502 17
                && ($token->flags & $format['flags']) === $format['flags']
503 17
            ) {
504
                // Running transformation function.
505 17
                if (!empty($format['function'])) {
506 17
                    $func = $format['function'];
507 17
                    $text = $func($text);
508 17
                }
509
510
                // Formatting HTML.
511 17
                if ($this->options['type'] === 'html') {
512 14
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
513 16
                } elseif ($this->options['type'] === 'cli') {
514 15
                    return $format['cli'] . $this->escapeConsole($text);
515
                }
516
517 14
                break;
518
            }
519 17
        }
520
521 14
        if ($this->options['type'] === 'cli') {
522 11
            return "\x1b[39m" . $this->escapeConsole($text);
523 14
        } elseif ($this->options['type'] === 'html') {
524 11
            return htmlspecialchars($text, ENT_NOQUOTES);
525
        }
526
527 14
        return $text;
528
    }
529
530
    /**
531
     * Formats a query.
532
     *
533
     * @param string $query   The query to be formatted
534
     * @param array  $options the formatting options
535
     *
536
     * @return string the formatted string
537
     */
538 18
    public static function format($query, array $options = array())
539
    {
540 18
        $lexer = new Lexer($query);
541 18
        $formatter = new self($options);
542
543 18
        return $formatter->formatList($lexer->list);
544
    }
545
546
    /**
547
     * Computes the length of a group.
548
     *
549
     * A group is delimited by a pair of brackets.
550
     *
551
     * @param TokensList $list the list of tokens
552
     *
553
     * @return int
554
     */
555 6
    public static function getGroupLength($list)
556
    {
557
        /**
558
         * The number of opening brackets found.
559
         * This counter starts at one because by the time this function called,
560
         * the list already advanced one position and the opening bracket was
561
         * already parsed.
562
         *
563
         * @var int
564
         */
565 6
        $count = 1;
566
567
        /**
568
         * The length of this group.
569
         *
570
         * @var int
571
         */
572 6
        $length = 0;
573
574 6
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
575
            // Counting the brackets.
576 6
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
577 6
                if ($list->tokens[$idx]->value === '(') {
578 1
                    ++$count;
579 6
                } elseif ($list->tokens[$idx]->value === ')') {
580 6
                    --$count;
581 6
                    if ($count == 0) {
582 6
                        break;
583
                    }
584 1
                }
585 4
            }
586
587
            // Keeping track of this group's length.
588 5
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
589 5
        }
590
591 6
        return $length;
592
    }
593
594
    /**
595
     * Checks if a token is a statement or a clause inside a statement.
596
     *
597
     * @param Token $token the token to be checked
598
     *
599
     * @return int|bool
600
     */
601 17
    public static function isClause($token)
602
    {
603
        if (
604 17
            ($token->type === Token::TYPE_KEYWORD && isset(Parser::$STATEMENT_PARSERS[$token->value]))
605 17
            || ($token->type === Token::TYPE_NONE && strtoupper($token->token) === 'DELIMITER')
606 17
        ) {
607 17
            return 2;
608
        } elseif (
609 17
            $token->type === Token::TYPE_KEYWORD && isset(Parser::$KEYWORD_PARSERS[$token->value])
610 17
        ) {
611 7
            return 1;
612
        }
613
614 17
        return false;
615
    }
616
}
617