Passed
Pull Request — master (#153)
by
unknown
03:40
created

Formatter::toString()   C

Complexity

Conditions 11
Paths 22

Size

Total Lines 44
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 25
CRAP Score 11

Importance

Changes 0
Metric Value
dl 0
loc 44
ccs 25
cts 25
cp 1
rs 5.2653
c 0
b 0
f 0
cc 11
eloc 25
nc 22
nop 1
crap 11

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 */
6
7
namespace PhpMyAdmin\SqlParser\Utils;
8
9
use PhpMyAdmin\SqlParser\Components\JoinKeyword;
10
use PhpMyAdmin\SqlParser\Lexer;
11
use PhpMyAdmin\SqlParser\Parser;
12
use PhpMyAdmin\SqlParser\Token;
13
use PhpMyAdmin\SqlParser\TokensList;
14
15
/**
16
 * Utilities that are used for formatting queries.
17
 *
18
 * @category   Misc
19
 *
20
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
21
 */
22
class Formatter
23
{
24
    /**
25
     * The formatting options.
26
     *
27
     * @var array
28
     */
29
    public $options;
30
31
    /**
32
     * Clauses that are usually short.
33
     *
34
     * These clauses share the line with the next clause.
35
     *
36
     * E.g. if INSERT was not here, the formatter would produce:
37
     *
38
     *      INSERT
39
     *      INTO foo
40
     *      VALUES(0, 0, 0),(1, 1, 1);
41
     *
42
     * Instead of:
43
     *
44
     *      INSERT INTO foo
45
     *      VALUES(0, 0, 0),(1, 1, 1)
46
     *
47
     * @var array
48
     */
49
    public static $SHORT_CLAUSES = array(
50
        'INSERT' => true,
51
    );
52
53
    /**
54
     * Clauses that must be inlined.
55
     *
56
     * These clauses usually are short and it's nicer to have them inline.
57
     *
58
     * @var array
59
     */
60
    public static $INLINE_CLAUSES = array(
61
        'CREATE' => true,
62
        'INTO' => true,
63
        'LIMIT' => true,
64
        'PARTITION BY' => true,
65
        'PARTITION' => true,
66
        'PROCEDURE' => true,
67
        'SUBPARTITION BY' => true,
68
        'VALUES' => true,
69
    );
70
71
    /**
72
     * Constructor.
73
     *
74
     * @param array $options the formatting options
75
     */
76 19
    public function __construct(array $options = array())
77
    {
78 19
        $this->options = $this->getMergedOptions($options);
79 19
    }
80
81
    /**
82
     * The specified formatting options are merged with the default values.
83
     *
84
     * @param array $options
85
     *
86
     * @return array
87
     */
88 23
    private function getMergedOptions(array $options)
89
    {
90 23
        $options = array_merge(
91 23
            $this->getDefaultOptions(),
92
            $options
93
        );
94
95 23
        if (isset($options['formats'])) {
96 4
            $options['formats'] = self::mergeFormats($this->getDefaultFormats(), $options['formats']);
97
        } else {
98 19
            $options['formats'] = $this->getDefaultFormats();
99
        }
100
101 23
        if (is_null($options['line_ending'])) {
102 19
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
103
        }
104
105 23
        if (is_null($options['indentation'])) {
106 23
            $options['indentation'] = $options['type'] === 'html' ? '&nbsp;&nbsp;&nbsp;&nbsp;' : '    ';
107
        }
108
109
        // `parts_newline` requires `clause_newline`
110 23
        $options['parts_newline'] &= $options['clause_newline'];
111
112 23
        return $options;
113
    }
114
115
    /**
116
     * The default formatting options.
117
     *
118
     * @return array
119
     */
120 19
    protected function getDefaultOptions()
121
    {
122
        return array(
123
            /*
124
             * The format of the result.
125
             *
126
             * @var string The type ('text', 'cli' or 'html')
127
             */
128 19
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
129
130
            /*
131
             * The line ending used.
132
             * By default, for text this is "\n" and for HTML this is "<br/>".
133
             *
134
             * @var string
135
             */
136
            'line_ending' => null,
137
138
            /*
139
             * The string used for indentation.
140
             *
141
             * @var string
142
             */
143
            'indentation' => null,
144
145
            /*
146
             * Whether comments should be removed or not.
147
             *
148
             * @var bool
149
             */
150
            'remove_comments' => false,
151
152
            /*
153
             * Whether each clause should be on a new line.
154
             *
155
             * @var bool
156
             */
157
            'clause_newline' => true,
158
159
            /*
160
             * Whether each part should be on a new line.
161
             * Parts are delimited by brackets and commas.
162
             *
163
             * @var bool
164
             */
165
            'parts_newline' => true,
166
167
            /*
168
             * Whether each part of each clause should be indented.
169
             *
170
             * @var bool
171
             */
172
            'indent_parts' => true,
173
        );
174
    }
175
176
    /**
177
     * The styles used for HTML formatting.
178
     * array($type, $flags, $span, $callback).
179
     *
180
     * @return array
181
     */
182 19
    protected function getDefaultFormats()
183
    {
184
        return array(
185
            array(
186
                'type' => Token::TYPE_KEYWORD,
187
                'flags' => Token::FLAG_KEYWORD_RESERVED,
188
                'html' => 'class="sql-reserved"',
189
                'cli' => "\x1b[35m",
190
                'function' => 'strtoupper',
191 19
            ),
192
            array(
193
                'type' => Token::TYPE_KEYWORD,
194
                'flags' => 0,
195
                'html' => 'class="sql-keyword"',
196
                'cli' => "\x1b[95m",
197
                'function' => 'strtoupper',
198
            ),
199
            array(
200
                'type' => Token::TYPE_COMMENT,
201
                'flags' => 0,
202
                'html' => 'class="sql-comment"',
203
                'cli' => "\x1b[37m",
204
                'function' => '',
205
            ),
206
            array(
207
                'type' => Token::TYPE_BOOL,
208
                'flags' => 0,
209
                'html' => 'class="sql-atom"',
210
                'cli' => "\x1b[36m",
211
                'function' => 'strtoupper',
212
            ),
213
            array(
214
                'type' => Token::TYPE_NUMBER,
215
                'flags' => 0,
216
                'html' => 'class="sql-number"',
217
                'cli' => "\x1b[92m",
218
                'function' => 'strtolower',
219
            ),
220
            array(
221
                'type' => Token::TYPE_STRING,
222
                'flags' => 0,
223
                'html' => 'class="sql-string"',
224
                'cli' => "\x1b[91m",
225
                'function' => '',
226
            ),
227
            array(
228
                'type' => Token::TYPE_SYMBOL,
229
                'flags' => 0,
230
                'html' => 'class="sql-variable"',
231
                'cli' => "\x1b[36m",
232
                'function' => '',
233
            ),
234
        );
235
    }
236
237 4
    private static function mergeFormats(array $formats, array $newFormats)
238
    {
239 4
        $added = array();
240 4
        $integers = array('flags', 'type');
241 4
        $strings = array('html', 'cli', 'function');
242
243
        /* Sanitize the array so that we do not have to care later */
244 4
        foreach ($newFormats as $j => $new) {
245 4
            foreach ($integers as $name) {
246 4
                if (!isset($new[$name])) {
247 4
                    $newFormats[$j][$name] = 0;
248
                }
249
            }
250 4
            foreach ($strings as $name) {
251 4
                if (!isset($new[$name])) {
252 4
                    $newFormats[$j][$name] = '';
253
                }
254
            }
255
        }
256
257
        /* Process changes to existing formats */
258 4
        foreach ($formats as $i => $original) {
259 4
            foreach ($newFormats as $j => $new) {
260 4
                if ($new['type'] === $original['type']
261 4
                    && $original['flags'] === $new['flags']
262
                ) {
263 3
                    $formats[$i] = $new;
264 4
                    $added[] = $j;
265
                }
266
            }
267
        }
268
269
        /* Add not already handled formats */
270 4
        foreach ($newFormats as $j => $new) {
271 4
            if (!in_array($j, $added)) {
272 4
                $formats[] = $new;
273
            }
274
        }
275
276 4
        return $formats;
277
    }
278
279
    /**
280
     * Formats the given list of tokens.
281
     *
282
     * @param TokensList $list the list of tokens
283
     *
284
     * @return string
285
     */
286 19
    public function formatList($list)
287
    {
288
        /**
289
         * The query to be returned.
290
         *
291
         * @var string
292
         */
293 19
        $ret = '';
294
295
        /**
296
         * The indentation level.
297
         *
298
         * @var int
299
         */
300 19
        $indent = 0;
301
302
        /**
303
         * Whether the line ended.
304
         *
305
         * @var bool
306
         */
307 19
        $lineEnded = false;
308
309
        /**
310
         * Whether current group is short (no linebreaks).
311
         *
312
         * @var bool
313
         */
314 19
        $shortGroup = false;
315
316
        /**
317
         * The name of the last clause.
318
         *
319
         * @var string
320
         */
321 19
        $lastClause = '';
322
323
        /**
324
         * A stack that keeps track of the indentation level every time a new
325
         * block is found.
326
         *
327
         * @var array
328
         */
329 19
        $blocksIndentation = array();
330
331
        /**
332
         * A stack that keeps track of the line endings every time a new block
333
         * is found.
334
         *
335
         * @var array
336
         */
337 19
        $blocksLineEndings = array();
338
339
        /**
340
         * Whether clause's options were formatted.
341
         *
342
         * @var bool
343
         */
344 19
        $formattedOptions = false;
345
346
        /**
347
         * Previously parsed token.
348
         *
349
         * @var Token|null
350
         */
351 19
        $prev = null;
352
353
        // In order to be able to format the queries correctly, the next token
354
        // must be taken into consideration. The loop below uses two pointers,
355
        // `$prev` and `$curr` which store two consecutive tokens.
356
        // Actually, at every iteration the previous token is being used.
357 19
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
358
            /**
359
             * Token parsed at this moment.
360
             *
361
             * @var Token
362
             */
363 19
            $curr = $list->tokens[$list->idx];
364
365 19
            if ($curr->type === Token::TYPE_WHITESPACE) {
366
                // Keep linebreaks after comments
367 18
                if (strpos($curr->token, "\n") !== false && $prev !== null && $prev->type === Token::TYPE_COMMENT) {
368 1
                    $lineEnded = true;
369
                }
370
                // Whitespaces are skipped because the formatter adds its own.
371 18
                continue;
372
            }
373
374 19
            if ($curr->type === Token::TYPE_COMMENT && $this->options['remove_comments']) {
375
                // Skip Comments if option `remove_comments` is enabled
376 1
                continue;
377
            }
378
379
            // Checking if pointers were initialized.
380 19
            if ($prev !== null) {
381
                // Checking if a new clause started.
382 18
                if (static::isClause($prev) !== false) {
383 18
                    $lastClause = $prev->value;
384 18
                    $formattedOptions = false;
385
                }
386
387
                // The options of a clause should stay on the same line and everything that follows.
388 18
                if ($this->options['parts_newline']
389 18
                    && !$formattedOptions
390 18
                    && empty(self::$INLINE_CLAUSES[$lastClause])
391
                    && (
392 17
                        $curr->type !== Token::TYPE_KEYWORD
393
                        || (
394 5
                            $curr->type === Token::TYPE_KEYWORD
395 18
                            && $curr->flags & Token::FLAG_KEYWORD_FUNCTION
396
                        )
397
                    )
398
                ) {
399 16
                    $formattedOptions = true;
400 16
                    $lineEnded = true;
401 16
                    ++$indent;
402
                }
403
404
                // Checking if this clause ended.
405 18
                if ($tmp = static::isClause($curr)) {
406 8
                    if (($tmp == 2 || $this->options['clause_newline']) && empty(self::$SHORT_CLAUSES[$lastClause])) {
407 8
                        $lineEnded = true;
408 8
                        if ($this->options['parts_newline'] && $indent > 0) {
409 7
                            --$indent;
410
                        }
411
                    }
412
                }
413
414
                // Inline JOINs
415 18
                if (($prev->type === Token::TYPE_KEYWORD && isset(JoinKeyword::$JOINS[$prev->value]))
416 18
                    || (in_array($curr->value, array('ON', 'USING'), true) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 2]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
417 18
                    || (isset($list->tokens[$list->idx - 4]) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 4]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 129 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
418 18
                    || (isset($list->tokens[$list->idx - 6]) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 6]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 129 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
419
                ) {
420 1
                    $lineEnded = false;
421
                }
422
423
                // Indenting BEGIN ... END blocks.
424 18
                if ($prev->type === Token::TYPE_KEYWORD && $prev->keyword === 'BEGIN') {
425 1
                    $lineEnded = true;
426 1
                    array_push($blocksIndentation, $indent);
427 1
                    ++$indent;
428 18
                } elseif ($curr->type === Token::TYPE_KEYWORD && $curr->keyword === 'END') {
429 1
                    $lineEnded = true;
430 1
                    $indent = array_pop($blocksIndentation);
431
                }
432
433
                // Formatting fragments delimited by comma.
434 18
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === ',') {
435
                    // Fragments delimited by a comma are broken into multiple
436
                    // pieces only if the clause is not inlined or this fragment
437
                    // is between brackets that are on new line.
438 4
                    if (end($blocksLineEndings) === true
439
                        || (
440 3
                            empty(self::$INLINE_CLAUSES[$lastClause])
441 3
                            && !$shortGroup
442 4
                            && $this->options['parts_newline']
443
                        )
444
                    ) {
445 3
                        $lineEnded = true;
446
                    }
447
                }
448
449
                // Handling brackets.
450
                // Brackets are indented only if the length of the fragment between
451
                // them is longer than 30 characters.
452 18
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === '(') {
453 6
                    array_push($blocksIndentation, $indent);
454 6
                    $shortGroup = true;
455 6
                    if (static::getGroupLength($list) > 30) {
456 1
                        ++$indent;
457 1
                        $lineEnded = true;
458 1
                        $shortGroup = false;
459
                    }
460 6
                    array_push($blocksLineEndings, $lineEnded);
461 18
                } elseif ($curr->type === Token::TYPE_OPERATOR && $curr->value === ')') {
462 5
                    $indent = array_pop($blocksIndentation);
463 5
                    $lineEnded |= array_pop($blocksLineEndings);
464 5
                    $shortGroup = false;
465
                }
466
467
                // Adding the token.
468 18
                $ret .= $this->toString($prev);
469
470
                // Finishing the line.
471 18
                if ($lineEnded) {
472 17
                    $ret .= $this->options['line_ending']
473 17
                        . str_repeat($this->options['indentation'], $indent);
474
475 17
                    $lineEnded = false;
476
                } else {
477
                    // If the line ended there is no point in adding whitespaces.
478
                    // Also, some tokens do not have spaces before or after them.
479
                    if (
480
                        // A space after delimiters that are longer than 2 characters.
481 18
                        $prev->keyword === 'DELIMITER'
482
                        || !(
483 18
                            ($prev->type === Token::TYPE_OPERATOR && ($prev->value === '.' || $prev->value === '('))
484
                            // No space after . (
485 18
                            || ($curr->type === Token::TYPE_OPERATOR && ($curr->value === '.' || $curr->value === ',' || $curr->value === '(' || $curr->value === ')'))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 167 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
486
                            // No space before . , ( )
487 18
                            || $curr->type === Token::TYPE_DELIMITER && mb_strlen($curr->value, 'UTF-8') < 2
488
                        )
489
                    ) {
490 10
                        $ret .= ' ';
491
                    }
492
                }
493
            }
494
495
            // Iteration finished, consider current token as previous.
496 19
            $prev = $curr;
497
        }
498
499 19
        if ($this->options['type'] === 'cli') {
500 17
            return $ret . "\x1b[0m";
501
        }
502
503 17
        return $ret;
504
    }
505
506 16
    public function escapeConsole($string)
507
    {
508 16
        return str_replace(
509
            array(
510 16
                "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", "\x0E", "\x0F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
511
                "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", "\x1C", "\x1D", "\x1E", "\x1F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
512
            ),
513
            array(
514 16
                '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
515
                '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
516
            ),
517
            $string
518
        );
519
    }
520
521
    /**
522
     * Tries to print the query and returns the result.
523
     *
524
     * @param Token $token the token to be printed
525
     *
526
     * @return string
527
     */
528 18
    public function toString($token)
529
    {
530 18
        $text = $token->token;
531 18
        static $prev;
532
533 18
        foreach ($this->options['formats'] as $format) {
534 18
            if ($token->type === $format['type']
535 18
                && ($token->flags & $format['flags']) === $format['flags']
536
            ) {
537
                // Running transformation function.
538 18
                if (!empty($format['function'])) {
539 18
                    $func = $format['function'];
540 18
                    $text = $func($text);
541
                }
542
543
                // Formatting HTML.
544 18
                if ($this->options['type'] === 'html') {
545 15
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
546 17
                } elseif ($this->options['type'] === 'cli') {
547 16
                    if ($prev != $format['cli']) {
548 16
                        $prev = $format['cli'];
549 16
                        return $format['cli'] . $this->escapeConsole($text);
550
                    }
551
552 5
                    return $this->escapeConsole($text);
553
                }
554
555 18
                break;
556
            }
557
        }
558
559 15
        if ($this->options['type'] === 'cli') {
560 12
            if ($prev != "\x1b[39m") {
561 12
                $prev = "\x1b[39m";
562 12
                return "\x1b[39m" . $this->escapeConsole($text);
563
            }
564
565 6
            return $this->escapeConsole($text);
566 15
        } elseif ($this->options['type'] === 'html') {
567 12
            return htmlspecialchars($text, ENT_NOQUOTES);
568
        }
569
570 15
        return $text;
571
    }
572
573
    /**
574
     * Formats a query.
575
     *
576
     * @param string $query   The query to be formatted
577
     * @param array  $options the formatting options
578
     *
579
     * @return string the formatted string
580
     */
581 19
    public static function format($query, array $options = array())
582
    {
583 19
        $lexer = new Lexer($query);
584 19
        $formatter = new self($options);
585
586 19
        return $formatter->formatList($lexer->list);
587
    }
588
589
    /**
590
     * Computes the length of a group.
591
     *
592
     * A group is delimited by a pair of brackets.
593
     *
594
     * @param TokensList $list the list of tokens
595
     *
596
     * @return int
597
     */
598 6
    public static function getGroupLength($list)
599
    {
600
        /**
601
         * The number of opening brackets found.
602
         * This counter starts at one because by the time this function called,
603
         * the list already advanced one position and the opening bracket was
604
         * already parsed.
605
         *
606
         * @var int
607
         */
608 6
        $count = 1;
609
610
        /**
611
         * The length of this group.
612
         *
613
         * @var int
614
         */
615 6
        $length = 0;
616
617 6
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
618
            // Counting the brackets.
619 6
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
620 6
                if ($list->tokens[$idx]->value === '(') {
621 1
                    ++$count;
622 6
                } elseif ($list->tokens[$idx]->value === ')') {
623 6
                    --$count;
624 6
                    if ($count == 0) {
625 6
                        break;
626
                    }
627
                }
628
            }
629
630
            // Keeping track of this group's length.
631 5
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
632
        }
633
634 6
        return $length;
635
    }
636
637
    /**
638
     * Checks if a token is a statement or a clause inside a statement.
639
     *
640
     * @param Token $token the token to be checked
641
     *
642
     * @return int|bool
643
     */
644 18
    public static function isClause($token)
645
    {
646
        if (
647 18
            ($token->type === Token::TYPE_KEYWORD && isset(Parser::$STATEMENT_PARSERS[$token->keyword]))
648 18
            || ($token->type === Token::TYPE_NONE && strtoupper($token->token) === 'DELIMITER')
649
        ) {
650 17
            return 2;
651
        } elseif (
652 18
            $token->type === Token::TYPE_KEYWORD && isset(Parser::$KEYWORD_PARSERS[$token->keyword])
653
        ) {
654 8
            return 1;
655
        }
656
657 18
        return false;
658
    }
659
}
660