Test Failed
Pull Request — master (#153)
by Michal
03:33
created

Formatter::toString()   C

Complexity

Conditions 11
Paths 22

Size

Total Lines 44
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 11

Importance

Changes 0
Metric Value
dl 0
loc 44
ccs 19
cts 19
cp 1
rs 5.2653
c 0
b 0
f 0
cc 11
eloc 25
nc 22
nop 1
crap 11

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 */
6
7
namespace PhpMyAdmin\SqlParser\Utils;
8
9
use PhpMyAdmin\SqlParser\Components\JoinKeyword;
10
use PhpMyAdmin\SqlParser\Lexer;
11
use PhpMyAdmin\SqlParser\Parser;
12
use PhpMyAdmin\SqlParser\Token;
13
use PhpMyAdmin\SqlParser\TokensList;
14
15
/**
16
 * Utilities that are used for formatting queries.
17
 *
18
 * @category   Misc
19
 *
20
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
21
 */
22
class Formatter
23
{
24
    /**
25
     * The formatting options.
26
     *
27
     * @var array
28
     */
29
    public $options;
30
31
    /**
32
     * Clauses that are usually short.
33
     *
34
     * These clauses share the line with the next clause.
35
     *
36
     * E.g. if INSERT was not here, the formatter would produce:
37
     *
38
     *      INSERT
39
     *      INTO foo
40
     *      VALUES(0, 0, 0),(1, 1, 1);
41
     *
42
     * Instead of:
43
     *
44
     *      INSERT INTO foo
45
     *      VALUES(0, 0, 0),(1, 1, 1)
46
     *
47
     * @var array
48
     */
49
    public static $SHORT_CLAUSES = array(
50
        'CREATE' => true,
51
        'INSERT' => true,
52
    );
53
54
    /**
55
     * Clauses that must be inlined.
56
     *
57
     * These clauses usually are short and it's nicer to have them inline.
58
     *
59
     * @var array
60
     */
61
    public static $INLINE_CLAUSES = array(
62
        'CREATE' => true,
63
        'INTO' => true,
64
        'LIMIT' => true,
65
        'PARTITION BY' => true,
66
        'PARTITION' => true,
67
        'PROCEDURE' => true,
68
        'SUBPARTITION BY' => true,
69
        'VALUES' => true,
70
    );
71
72
    /**
73
     * Constructor.
74
     *
75
     * @param array $options the formatting options
76
     */
77 19
    public function __construct(array $options = array())
78
    {
79 19
        $this->options = $this->getMergedOptions($options);
80 19
    }
81
82
    /**
83
     * The specified formatting options are merged with the default values.
84
     *
85
     * @param array $options
86
     *
87
     * @return array
88
     */
89 23
    private function getMergedOptions(array $options)
90
    {
91 23
        $options = array_merge(
92 23
            $this->getDefaultOptions(),
93
            $options
94
        );
95
96 23
        if (isset($options['formats'])) {
97 4
            $options['formats'] = self::mergeFormats($this->getDefaultFormats(), $options['formats']);
98
        } else {
99 19
            $options['formats'] = $this->getDefaultFormats();
100
        }
101
102 23
        if (is_null($options['line_ending'])) {
103 19
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
104
        }
105
106 23
        if (is_null($options['indentation'])) {
107 23
            $options['indentation'] = $options['type'] === 'html' ? '&nbsp;&nbsp;&nbsp;&nbsp;' : '    ';
108
        }
109
110
        // `parts_newline` requires `clause_newline`
111 23
        $options['parts_newline'] &= $options['clause_newline'];
112
113 23
        return $options;
114
    }
115
116
    /**
117
     * The default formatting options.
118
     *
119
     * @return array
120
     */
121 19
    protected function getDefaultOptions()
122
    {
123
        return array(
124
            /*
125
             * The format of the result.
126
             *
127
             * @var string The type ('text', 'cli' or 'html')
128
             */
129 19
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
130
131
            /*
132
             * The line ending used.
133
             * By default, for text this is "\n" and for HTML this is "<br/>".
134
             *
135
             * @var string
136
             */
137
            'line_ending' => null,
138
139
            /*
140
             * The string used for indentation.
141
             *
142
             * @var string
143
             */
144
            'indentation' => null,
145
146
            /*
147
             * Whether comments should be removed or not.
148
             *
149
             * @var bool
150
             */
151
            'remove_comments' => false,
152
153
            /*
154
             * Whether each clause should be on a new line.
155
             *
156
             * @var bool
157
             */
158
            'clause_newline' => true,
159
160
            /*
161
             * Whether each part should be on a new line.
162
             * Parts are delimited by brackets and commas.
163
             *
164
             * @var bool
165
             */
166
            'parts_newline' => true,
167
168
            /*
169
             * Whether each part of each clause should be indented.
170
             *
171
             * @var bool
172
             */
173
            'indent_parts' => true,
174
        );
175
    }
176
177
    /**
178
     * The styles used for HTML formatting.
179
     * array($type, $flags, $span, $callback).
180
     *
181
     * @return array
182
     */
183 19
    protected function getDefaultFormats()
184
    {
185
        return array(
186
            array(
187
                'type' => Token::TYPE_KEYWORD,
188
                'flags' => Token::FLAG_KEYWORD_RESERVED,
189
                'html' => 'class="sql-reserved"',
190
                'cli' => "\x1b[35m",
191
                'function' => 'strtoupper',
192 19
            ),
193
            array(
194
                'type' => Token::TYPE_KEYWORD,
195
                'flags' => 0,
196
                'html' => 'class="sql-keyword"',
197
                'cli' => "\x1b[95m",
198
                'function' => 'strtoupper',
199
            ),
200
            array(
201
                'type' => Token::TYPE_COMMENT,
202
                'flags' => 0,
203
                'html' => 'class="sql-comment"',
204
                'cli' => "\x1b[37m",
205
                'function' => '',
206
            ),
207
            array(
208
                'type' => Token::TYPE_BOOL,
209
                'flags' => 0,
210
                'html' => 'class="sql-atom"',
211
                'cli' => "\x1b[36m",
212
                'function' => 'strtoupper',
213
            ),
214
            array(
215
                'type' => Token::TYPE_NUMBER,
216
                'flags' => 0,
217
                'html' => 'class="sql-number"',
218
                'cli' => "\x1b[92m",
219
                'function' => 'strtolower',
220
            ),
221
            array(
222
                'type' => Token::TYPE_STRING,
223
                'flags' => 0,
224
                'html' => 'class="sql-string"',
225
                'cli' => "\x1b[91m",
226
                'function' => '',
227
            ),
228
            array(
229
                'type' => Token::TYPE_SYMBOL,
230
                'flags' => 0,
231
                'html' => 'class="sql-variable"',
232
                'cli' => "\x1b[36m",
233
                'function' => '',
234
            ),
235
        );
236
    }
237
238 4
    private static function mergeFormats(array $formats, array $newFormats)
239
    {
240 4
        $added = array();
241 4
        $integers = array('flags', 'type');
242 4
        $strings = array('html', 'cli', 'function');
243
244
        /* Sanitize the array so that we do not have to care later */
245 4
        foreach ($newFormats as $j => $new) {
246 4
            foreach ($integers as $name) {
247 4
                if (!isset($new[$name])) {
248 4
                    $newFormats[$j][$name] = 0;
249
                }
250
            }
251 4
            foreach ($strings as $name) {
252 4
                if (!isset($new[$name])) {
253 4
                    $newFormats[$j][$name] = '';
254
                }
255
            }
256
        }
257
258
        /* Process changes to existing formats */
259 4
        foreach ($formats as $i => $original) {
260 4
            foreach ($newFormats as $j => $new) {
261 4
                if ($new['type'] === $original['type']
262 4
                    && $original['flags'] === $new['flags']
263
                ) {
264 3
                    $formats[$i] = $new;
265 4
                    $added[] = $j;
266
                }
267
            }
268
        }
269
270
        /* Add not already handled formats */
271 4
        foreach ($newFormats as $j => $new) {
272 4
            if (!in_array($j, $added)) {
273 4
                $formats[] = $new;
274
            }
275
        }
276
277 4
        return $formats;
278
    }
279
280
    /**
281
     * Formats the given list of tokens.
282
     *
283
     * @param TokensList $list the list of tokens
284
     *
285
     * @return string
286
     */
287 19
    public function formatList($list)
288
    {
289
        /**
290
         * The query to be returned.
291
         *
292
         * @var string
293
         */
294 19
        $ret = '';
295
296
        /**
297
         * The indentation level.
298
         *
299
         * @var int
300
         */
301 19
        $indent = 0;
302
303
        /**
304
         * Whether the line ended.
305
         *
306
         * @var bool
307
         */
308 19
        $lineEnded = false;
309
310
        /**
311
         * Whether current group is short (no linebreaks).
312
         *
313
         * @var bool
314
         */
315 19
        $shortGroup = false;
316
317
        /**
318
         * The name of the last clause.
319
         *
320
         * @var string
321
         */
322 19
        $lastClause = '';
323
324
        /**
325
         * A stack that keeps track of the indentation level every time a new
326
         * block is found.
327
         *
328
         * @var array
329
         */
330 19
        $blocksIndentation = array();
331
332
        /**
333
         * A stack that keeps track of the line endings every time a new block
334
         * is found.
335
         *
336
         * @var array
337
         */
338 19
        $blocksLineEndings = array();
339
340
        /**
341
         * Whether clause's options were formatted.
342
         *
343
         * @var bool
344
         */
345 19
        $formattedOptions = false;
346
347
        /**
348
         * Previously parsed token.
349
         *
350
         * @var Token|null
351
         */
352 19
        $prev = null;
353
354
        // In order to be able to format the queries correctly, the next token
355
        // must be taken into consideration. The loop below uses two pointers,
356
        // `$prev` and `$curr` which store two consecutive tokens.
357
        // Actually, at every iteration the previous token is being used.
358 19
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
359
            /**
360
             * Token parsed at this moment.
361
             *
362
             * @var Token
363
             */
364 19
            $curr = $list->tokens[$list->idx];
365
366 19
            if ($curr->type === Token::TYPE_WHITESPACE) {
367
                // Keep linebreaks after comments
368 18
                if (strpos($curr->token, "\n") !== false && $prev !== null && $prev->type === Token::TYPE_COMMENT) {
369 1
                    $lineEnded = true;
370
                }
371
                // Whitespaces are skipped because the formatter adds its own.
372 18
                continue;
373
            }
374
375 19
            if ($curr->type === Token::TYPE_COMMENT && $this->options['remove_comments']) {
376
                // Skip Comments if option `remove_comments` is enabled
377 1
                continue;
378
            }
379
380
            // Checking if pointers were initialized.
381 19
            if ($prev !== null) {
382
                // Checking if a new clause started.
383 18
                if (static::isClause($prev) !== false) {
384 18
                    $lastClause = $prev->value;
385 18
                    $formattedOptions = false;
386
                }
387
388
                // The options of a clause should stay on the same line and everything that follows.
389 18
                if ($this->options['parts_newline']
390 18
                    && !$formattedOptions
391 18
                    && empty(self::$INLINE_CLAUSES[$lastClause])
392
                    && (
393 17
                        $curr->type !== Token::TYPE_KEYWORD
394
                        || (
395 5
                            $curr->type === Token::TYPE_KEYWORD
396 18
                            && $curr->flags & Token::FLAG_KEYWORD_FUNCTION
397
                        )
398
                    )
399
                ) {
400 16
                    $formattedOptions = true;
401 16
                    $lineEnded = true;
402 16
                    ++$indent;
403
                }
404
405
                // Checking if this clause ended.
406 18
                if ($tmp = static::isClause($curr)) {
407 8
                    if (($tmp == 2 || $this->options['clause_newline']) && empty(self::$SHORT_CLAUSES[$lastClause])) {
408 8
                        $lineEnded = true;
409 8
                        if ($this->options['parts_newline'] && $indent > 0) {
410 7
                            --$indent;
411
                        }
412
                    }
413
                }
414
415
                // Inline JOINs
416 18
                if (($prev->type === Token::TYPE_KEYWORD && isset(JoinKeyword::$JOINS[$prev->value]))
417 18
                    || (in_array($curr->value, array('ON', 'USING'), true) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 2]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
418 18
                    || (isset($list->tokens[$list->idx - 4]) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 4]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 129 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
419 18
                    || (isset($list->tokens[$list->idx - 6]) && isset(JoinKeyword::$JOINS[$list->tokens[$list->idx - 6]->value]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 129 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
420
                ) {
421 1
                    $lineEnded = false;
422
                }
423
424
                // Indenting BEGIN ... END blocks.
425 18
                if ($prev->type === Token::TYPE_KEYWORD && $prev->keyword === 'BEGIN') {
426 1
                    $lineEnded = true;
427 1
                    array_push($blocksIndentation, $indent);
428 1
                    ++$indent;
429 18
                } elseif ($curr->type === Token::TYPE_KEYWORD && $curr->keyword === 'END') {
430 1
                    $lineEnded = true;
431 1
                    $indent = array_pop($blocksIndentation);
432
                }
433
434
                // Formatting fragments delimited by comma.
435 18
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === ',') {
436
                    // Fragments delimited by a comma are broken into multiple
437
                    // pieces only if the clause is not inlined or this fragment
438
                    // is between brackets that are on new line.
439 4
                    if (end($blocksLineEndings) === true
440
                        || (
441 3
                            empty(self::$INLINE_CLAUSES[$lastClause])
442 3
                            && !$shortGroup
443 4
                            && $this->options['parts_newline']
444
                        )
445
                    ) {
446 3
                        $lineEnded = true;
447
                    }
448
                }
449
450
                // Handling brackets.
451
                // Brackets are indented only if the length of the fragment between
452
                // them is longer than 30 characters.
453 18
                if ($prev->type === Token::TYPE_OPERATOR && $prev->value === '(') {
454 6
                    array_push($blocksIndentation, $indent);
455 6
                    $shortGroup = true;
456 6
                    if (static::getGroupLength($list) > 30) {
457 1
                        ++$indent;
458 1
                        $lineEnded = true;
459 1
                        $shortGroup = false;
460
                    }
461 6
                    array_push($blocksLineEndings, $lineEnded);
462 18
                } elseif ($curr->type === Token::TYPE_OPERATOR && $curr->value === ')') {
463 5
                    $indent = array_pop($blocksIndentation);
464 5
                    $lineEnded |= array_pop($blocksLineEndings);
465 5
                    $shortGroup = false;
466
                }
467
468
                // Adding the token.
469 18
                $ret .= $this->toString($prev);
470
471
                // Finishing the line.
472 18
                if ($lineEnded) {
473 17
                    $ret .= $this->options['line_ending']
474 17
                        . str_repeat($this->options['indentation'], $indent);
475
476 17
                    $lineEnded = false;
477
                } else {
478
                    // If the line ended there is no point in adding whitespaces.
479
                    // Also, some tokens do not have spaces before or after them.
480
                    if (
481
                        // A space after delimiters that are longer than 2 characters.
482 18
                        $prev->keyword === 'DELIMITER'
483
                        || !(
484 18
                            ($prev->type === Token::TYPE_OPERATOR && ($prev->value === '.' || $prev->value === '('))
485
                            // No space after . (
486 18
                            || ($curr->type === Token::TYPE_OPERATOR && ($curr->value === '.' || $curr->value === ',' || $curr->value === '(' || $curr->value === ')'))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 167 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
487
                            // No space before . , ( )
488 18
                            || $curr->type === Token::TYPE_DELIMITER && mb_strlen($curr->value, 'UTF-8') < 2
489
                        )
490
                    ) {
491 10
                        $ret .= ' ';
492
                    }
493
                }
494
            }
495
496
            // Iteration finished, consider current token as previous.
497 19
            $prev = $curr;
498
        }
499
500 19
        if ($this->options['type'] === 'cli') {
501 17
            return $ret . "\x1b[0m";
502
        }
503
504 17
        return $ret;
505
    }
506
507 16
    public function escapeConsole($string)
508
    {
509 16
        return str_replace(
510
            array(
511 16
                "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D", "\x0E", "\x0F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
512
                "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\x1B", "\x1C", "\x1D", "\x1E", "\x1F",
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
513
            ),
514
            array(
515 16
                '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
516
                '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 143 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
517
            ),
518
            $string
519
        );
520
    }
521
522
    /**
523
     * Tries to print the query and returns the result.
524
     *
525
     * @param Token $token the token to be printed
526
     *
527
     * @return string
528
     */
529 18
    public function toString($token)
530
    {
531 18
        $text = $token->token;
532
        static $prev;
533 18
534 18
        foreach ($this->options['formats'] as $format) {
535 18
            if ($token->type === $format['type']
536
                && ($token->flags & $format['flags']) === $format['flags']
537
            ) {
538 18
                // Running transformation function.
539 18
                if (!empty($format['function'])) {
540 18
                    $func = $format['function'];
541
                    $text = $func($text);
542
                }
543
544 18
                // Formatting HTML.
545 15
                if ($this->options['type'] === 'html') {
546 17
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
547 16
                } elseif ($this->options['type'] === 'cli') {
548
                    if ($prev != $format['cli']) {
549
                        $prev = $format['cli'];
550 18
                        return $format['cli'] . $this->escapeConsole($text);
551
                    }
552
553
                    return $this->escapeConsole($text);
554 15
                }
555 12
556 15
                break;
557 12
            }
558
        }
559
560 15
        if ($this->options['type'] === 'cli') {
561
            if ($prev != "\x1b[39m") {
562
                $prev = "\x1b[39m";
563
                return "\x1b[39m" . $this->escapeConsole($text);
564
            }
565
566
            return $this->escapeConsole($text);
567
        } elseif ($this->options['type'] === 'html') {
568
            return htmlspecialchars($text, ENT_NOQUOTES);
569
        }
570
571 19
        return $text;
572
    }
573 19
574 19
    /**
575
     * Formats a query.
576 19
     *
577
     * @param string $query   The query to be formatted
578
     * @param array  $options the formatting options
579
     *
580
     * @return string the formatted string
581
     */
582
    public static function format($query, array $options = array())
583
    {
584
        $lexer = new Lexer($query);
585
        $formatter = new self($options);
586
587
        return $formatter->formatList($lexer->list);
588 6
    }
589
590
    /**
591
     * Computes the length of a group.
592
     *
593
     * A group is delimited by a pair of brackets.
594
     *
595
     * @param TokensList $list the list of tokens
596
     *
597
     * @return int
598 6
     */
599
    public static function getGroupLength($list)
600
    {
601
        /**
602
         * The number of opening brackets found.
603
         * This counter starts at one because by the time this function called,
604
         * the list already advanced one position and the opening bracket was
605 6
         * already parsed.
606
         *
607 6
         * @var int
608
         */
609 6
        $count = 1;
610 6
611 1
        /**
612 6
         * The length of this group.
613 6
         *
614 6
         * @var int
615 6
         */
616
        $length = 0;
617
618
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
619
            // Counting the brackets.
620
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
621 5
                if ($list->tokens[$idx]->value === '(') {
622
                    ++$count;
623
                } elseif ($list->tokens[$idx]->value === ')') {
624 6
                    --$count;
625
                    if ($count == 0) {
626
                        break;
627
                    }
628
                }
629
            }
630
631
            // Keeping track of this group's length.
632
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
633
        }
634 18
635
        return $length;
636
    }
637 18
638 18
    /**
639
     * Checks if a token is a statement or a clause inside a statement.
640 17
     *
641
     * @param Token $token the token to be checked
642 18
     *
643
     * @return int|bool
644 8
     */
645
    public static function isClause($token)
646
    {
647 18
        if (
648
            ($token->type === Token::TYPE_KEYWORD && isset(Parser::$STATEMENT_PARSERS[$token->keyword]))
649
            || ($token->type === Token::TYPE_NONE && strtoupper($token->token) === 'DELIMITER')
650
        ) {
651
            return 2;
652
        } elseif (
653
            $token->type === Token::TYPE_KEYWORD && isset(Parser::$KEYWORD_PARSERS[$token->keyword])
654
        ) {
655
            return 1;
656
        }
657
658
        return false;
659
    }
660
}
661