Completed
Push — master ( f3c5ad...f7e232 )
by Michal
8s
created

Formatter::mergeFormats()   F

Complexity

Conditions 20
Paths 342

Size

Total Lines 46
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 20

Importance

Changes 0
Metric Value
cc 20
eloc 28
nc 342
nop 2
dl 0
loc 46
ccs 7
cts 7
cp 1
crap 20
rs 3.6338
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 *
6
 * @package    SqlParser
7
 * @subpackage Utils
8
 */
9
namespace SqlParser\Utils;
10
11
use SqlParser\Lexer;
12
use SqlParser\Parser;
13
use SqlParser\Token;
14
use SqlParser\TokensList;
15
16
/**
17
 * Utilities that are used for formatting queries.
18
 *
19
 * @category   Misc
20
 * @package    SqlParser
21
 * @subpackage Utils
22
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
23
 */
24
class Formatter
25
{
26
27
    /**
28
     * The formatting options.
29
     *
30
     * @var array
31
     */
32
    public $options;
33
34
    /**
35
     * Clauses that must be inlined.
36
     *
37
     * These clauses usually are short and it's nicer to have them inline.
38
     *
39
     * @var array
40
     */
41
    public static $INLINE_CLAUSES = array(
42
        'CREATE'                        => true,
43
        'LIMIT'                         => true,
44
        'PARTITION BY'                  => true,
45
        'PARTITION'                     => true,
46
        'PROCEDURE'                     => true,
47
        'SUBPARTITION BY'               => true,
48
        'VALUES'                        => true,
49
    );
50
51
    /**
52
     * Constructor.
53
     *
54
     * @param array $options The formatting options.
55
     */
56
    public function __construct(array $options = array())
57 8
    {
58
        $this->options = $this->getMergedOptions($options);
59
    }
60 8
61
    /**
62
     * The specified formatting options are merged with the default values.
63
     *
64
     * @param array $options
65
     * @return array
66
     */
67
    private function getMergedOptions(array $options)
68 8
    {
69
        $options = array_merge(
70
            $this->getDefaultOptions(),
71
            $options
72
        );
73
74
        $options['formats'] = self::mergeFormats($this->getDefaultFormats(), @$options['formats'] ?: array());
75
76 8
        if (is_null($options['line_ending'])) {
77
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
78
        }
79
80
        // `parts_newline` requires `clause_newline`
81
        $options['parts_newline'] &= $options['clause_newline'];
82
83 8
        return $options;
84
    }
85
86
    /**
87
     * The default formatting options.
88
     *
89
     * @return array
90 8
     */
91
    protected function getDefaultOptions()
92
    {
93
        return array(
94
            /**
95
             * The format of the result.
96
             *
97 8
             * @var string The type ('text', 'cli' or 'html')
98
             */
99
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
100
101
            /**
102
             * The line ending used.
103
             * By default, for text this is "\n" and for HTML this is "<br/>".
104
             *
105 8
             * @var string
106
             */
107
            'line_ending' => NULL,
108
109
            /**
110
             * The string used for indentation.
111
             *
112 8
             * @var string
113
             */
114
            'indentation' => '  ',
115
116
            /**
117
             * Whether comments should be removed or not.
118
             *
119
             * @var bool
120
             */
121
            'remove_comments' => false,
122 8
123 8
            /**
124 8
             * Whether each clause should be on a new line.
125 8
             *
126 8
             * @var bool
127 8
             */
128
            'clause_newline' => true,
129 8
130 8
            /**
131 8
             * Whether each part should be on a new line.
132 8
             * Parts are delimited by brackets and commas.
133 8
             *
134 8
             * @var bool
135
             */
136 8
            'parts_newline' => true,
137 8
138 8
            /**
139 8
             * Whether each part of each clause should be indented.
140 8
             *
141 8
             * @var bool
142
             */
143 8
            'indent_parts' => true,
144 8
        );
145 8
    }
146 8
147 8
    /**
148 8
     * The styles used for HTML formatting.
149
     * array($type, $flags, $span, $callback)
150 8
     *
151 8
     * @return array
152 8
     */
153 8
    protected function getDefaultFormats()
154 8
    {
155 8
        return array(
156
            array(
157 8
                'type' => Token::TYPE_KEYWORD,
158 8
                'flags' => Token::FLAG_KEYWORD_RESERVED,
159 8
                'html' => 'class="sql-reserved"',
160 8
                'cli' => "\x1b[35m",
161 8
                'function' => 'strtoupper',
162 8
            ),
163
            array(
164 8
                'type' => Token::TYPE_KEYWORD,
165 8
                'flags' => 0,
166 8
                'html' => 'class="sql-keyword"',
167 8
                'cli' => "\x1b[95m",
168 8
                'function' => 'strtoupper',
169 8
            ),
170
            array(
171 8
                'type' => Token::TYPE_COMMENT,
172
                'flags' => 0,
173 8
                'html' => 'class="sql-comment"',
174
                'cli' => "\x1b[37m",
175
                'function' => '',
176 8
            ),
177 8
            array(
178
                'type' => Token::TYPE_BOOL,
179
                'flags' => 0,
180
                'html' => 'class="sql-atom"',
181
                'cli' => "\x1b[36m",
182
                'function' => 'strtoupper',
183
            ),
184
            array(
185
                'type' => Token::TYPE_NUMBER,
186 8
                'flags' => 0,
187
                'html' => 'class="sql-number"',
188
                'cli' => "\x1b[92m",
189
                'function' => 'strtolower',
190
            ),
191
            array(
192
                'type' => Token::TYPE_STRING,
193
                'flags' => 0,
194 8
                'html' => 'class="sql-string"',
195
                'cli' => "\x1b[91m",
196
                'function' => '',
197
            ),
198
            array(
199
                'type' => Token::TYPE_SYMBOL,
200
                'flags' => 0,
201 8
                'html' => 'class="sql-variable"',
202
                'cli' => "\x1b[36m",
203
                'function' => '',
204
            ),
205
        );
206
    }
207
208 8
    private static function mergeFormats(array $formats, array $newFormats)
209
    {
210
        $added = array();
211
212
        foreach ($formats as $i => $original) {
213
            foreach ($newFormats as $j => $new) {
214
                if (isset($new['type'])
215 8
                    && $new['type'] === $original['type']
216
                    && (
217
                        (
218
                            isset($new['flags'])
219
                            && $original['flags'] === $new['flags']
220
                        )
221
                        || (
222 8
                            !isset($new['flags'])
223
                            && $original['flags'] == 0
224
                        )
225
                    )
226
                ) {
227
                    $formats[$i] = array(
228
                        'type' => $original['type'],
229
                        'flags' => isset($new['flags']) ? $new['flags'] : 0,
230 8
                        'html' => isset($new['html']) ? $new['html'] : '',
231
                        'cli' => isset($new['cli']) ? $new['cli'] : '',
232
                        'function' => isset($new['function']) ? $new['function'] : '',
233
                    );
234
235
                    $added[] = $j;
236
                }
237
            }
238 8
        }
239
240
        foreach ($newFormats as $j => $new) {
241
            if (!in_array($j, $added) && isset($new['type'])) {
242
                $formats[] = array(
243
                    'type' => $new['type'],
244
                    'flags' => isset($new['flags']) ? $new['flags'] : 0,
245 8
                    'html' => isset($new['html']) ? $new['html'] : '',
246
                    'cli' => isset($new['cli']) ? $new['cli'] : '',
247
                    'function' => isset($new['function']) ? $new['function'] : '',
248
                );
249
            }
250
        }
251
252 8
        return $formats;
253
    }
254
255
    /**
256
     * Formats the given list of tokens.
257
     *
258
     * @param TokensList $list The list of tokens.
259
     *
260 8
     * @return string
261
     */
262
    public function formatList($list)
263
    {
264
265
        /**
266 8
         * The query to be returned.
267
         *
268
         * @var string $ret
269
         */
270
        $ret = '';
271
272 8
        /**
273
         * The indentation level.
274 8
         *
275
         * @var int $indent
276 8
         */
277 8
        $indent = 0;
278
279 1
        /**
280
         * Whether the line ended.
281
         *
282
         * @var bool $lineEnded
283 1
         */
284
        $lineEnded = false;
285
286 1
        /**
287 1
         * Whether current group is short (no linebreaks)
288 1
         *
289 1
         * @var bool $shortGroup
290 1
         */
291 1
        $shortGroup = false;
292
293
        /**
294
         * The name of the last clause.
295
         *
296
         * @var string $lastClause
297
         */
298
        $lastClause = '';
299 1
300
        /**
301
         * A stack that keeps track of the indentation level every time a new
302
         * block is found.
303 8
         *
304
         * @var array $blocksIndentation
305 8
         */
306 8
        $blocksIndentation = array();
307 8
308 8
        /**
309
         * A stack that keeps track of the line endings every time a new block
310
         * is found.
311 8
         *
312 8
         * @var array $blocksLineEndings
313 8
         */
314 8
        $blocksLineEndings = array();
315 3
316 3
        /**
317 8
         * Whether clause's options were formatted.
318 8
         *
319 8
         * @var bool $formattedOptions
320 8
         */
321 8
        $formattedOptions = false;
322
323
        /**
324 8
         * Previously parsed token.
325 4
         *
326 4
         * @var Token $prev
327 4
         */
328 4
        $prev = null;
329 4
330 4
        /**
331 4
         * Comments are being formatted separately to maintain the whitespaces
332
         * before and after them.
333
         *
334 8
         * @var string $comment
335 1
         */
336 1
        $comment = '';
337 1
338 8
        // In order to be able to format the queries correctly, the next token
339 1
        // must be taken into consideration. The loop below uses two pointers,
340 1
        // `$prev` and `$curr` which store two consecutive tokens.
341 1
        // Actually, at every iteration the previous token is being used.
342
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
343
            /**
344 8
             * Token parsed at this moment.
345
             *
346
             * @var Token $curr
347
             */
348 2
            $curr = $list->tokens[$list->idx];
349 2
350 2
            if ($curr->type === Token::TYPE_WHITESPACE) {
351 2
                // Whitespaces are skipped because the formatter adds its own.
352 2
                continue;
353 1
            } elseif ($curr->type === Token::TYPE_COMMENT) {
354 1
                // Whether the comments should be parsed.
355 2
                if (!empty($this->options['remove_comments'])) {
356
                    continue;
357
                }
358
359
                if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) {
360 8
                    // The whitespaces before and after are preserved for
361 4
                    // formatting reasons.
362 4
                    $comment .= $list->tokens[$list->idx - 1]->token;
363 4
                }
364
                $comment .= $this->toString($curr);
365
                if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE)
366
                    && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT)
367
                ) {
368 4
                    // Adding the next whitespace only there is no comment that
369 8
                    // follows it immediately which may cause adding a
370 3
                    // whitespace twice.
371 3
                    $comment .= $list->tokens[$list->idx + 1]->token;
372 3
                }
373 3
374
                // Everything was handled here, no need to continue.
375
                continue;
376
            }
377 8
378 8
            // Checking if pointers were initialized.
379 8
            if ($prev !== null) {
380
                // Checking if a new clause started.
381
                if (static::isClause($prev) !== false) {
382 8
                    $lastClause = $prev->value;
383
                    $formattedOptions = false;
384
                }
385 8
386 8
                // The options of a clause should stay on the same line and everything that follows.
387
                if (($this->options['parts_newline'])
388 2
                    && (!$formattedOptions)
389 2
                    && (empty(self::$INLINE_CLAUSES[$lastClause]))
390
                    && (($curr->type !== Token::TYPE_KEYWORD)
391 8
                    || (($curr->type === Token::TYPE_KEYWORD)
392 8
                    && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION)))
393 8
                ) {
394 8
                    $formattedOptions = true;
395 8
                    $lineEnded = true;
396 8
                    ++$indent;
397
                }
398
399 8
                // Checking if this clause ended.
400
                if ($tmp = static::isClause($curr)) {
401 8
                    if (($tmp == 2) || ($this->options['clause_newline'])) {
402 5
                        $lineEnded = true;
403
                        if ($this->options['parts_newline']) {
404 8
                            --$indent;
405
                        }
406 8
                    }
407 8
                }
408 4
409 4
                // Indenting BEGIN ... END blocks.
410
                if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) {
411 8
                    $lineEnded = true;
412
                    array_push($blocksIndentation, $indent);
413 8
                    ++$indent;
414 1 View Code Duplication
                } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) {
415 1
                    $lineEnded = true;
416 1
                    $indent = array_pop($blocksIndentation);
417
                }
418
419 8
                // Formatting fragments delimited by comma.
420 8
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) {
421
                    // Fragments delimited by a comma are broken into multiple
422 8
                    // pieces only if the clause is not inlined or this fragment
423 1
                    // is between brackets that are on new line.
424
                    if (((empty(self::$INLINE_CLAUSES[$lastClause]))
425
                        && ! $shortGroup
426 7
                        && ($this->options['parts_newline']))
427
                        || (end($blocksLineEndings) === true)
428
                    ) {
429
                        $lineEnded = true;
430
                    }
431
                }
432
433
                // Handling brackets.
434
                // Brackets are indented only if the length of the fragment between
435
                // them is longer than 30 characters.
436 8
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) {
437
                    array_push($blocksIndentation, $indent);
438 8
                    $shortGroup = true;
439
                    if (static::getGroupLength($list) > 30) {
440 8
                        ++$indent;
441 8
                        $lineEnded = true;
442 8
                        $shortGroup = false;
443 8
                    }
444
                    array_push($blocksLineEndings, $lineEnded);
445 8 View Code Duplication
                } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) {
446 8
                    $indent = array_pop($blocksIndentation);
447 8
                    $lineEnded |= array_pop($blocksLineEndings);
448 8
                    $shortGroup = false;
449
                }
450
451 8
                // Delimiter must be placed on the same line with the last
452 7
                // clause.
453 1
                if ($curr->type === Token::TYPE_DELIMITER) {
454 1
                    $lineEnded = false;
455
                }
456
457
                // Adding the token.
458
                $ret .= $this->toString($prev);
459 8
460
                // Finishing the line.
461 5
                if ($lineEnded) {
462
                    if ($indent < 0) {
463
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
464 5
                        $indent = 0;
465
                    }
466
467
                    if ($curr->type !== Token::TYPE_COMMENT) {
468
                        $ret .= $this->options['line_ending']
469
                            . str_repeat($this->options['indentation'], $indent);
470
                    }
471
                    $lineEnded = false;
472
                } else {
473
                    // If the line ended there is no point in adding whitespaces.
474
                    // Also, some tokens do not have spaces before or after them.
475 8
                    if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '(')))
476
                        // No space after . (
477 8
                        || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',')
478 8
                        || ($curr->value === '(') || ($curr->value === ')')))
479 8
                        // No space before . , ( )
480
                        || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2))
481
                        // A space after delimiters that are longer than 2 characters.
482
                        || ($prev->value === 'DELIMITER')
483
                    ) {
484
                        $ret .= ' ';
485
                    }
486
                }
487
            }
488
489
            if (!empty($comment)) {
490
                $ret .= $comment;
491 4
                $comment = '';
492
            }
493
494
            // Iteration finished, consider current token as previous.
495
            $prev = $curr;
496
        }
497
498
        if ($this->options['type'] === 'cli') {
499
            return $ret . "\x1b[0m";
500
        }
501 4
502
        return $ret;
503
    }
504
505
    public function escapeConsole($string)
506
    {
507
        return str_replace(
508 4
            array(
509
                "\x00", "\x01", "\x02", "\x03", "\x04",
510 4
                "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A",
511
                "\x0B","\x0C","\x0D", "\x0E", "\x0F", "\x10", "\x11",
512 4
                "\x12","\x13","\x14","\x15", "\x16", "\x17", "\x18",
513 4
                "\x19","\x1A","\x1B","\x1C","\x1D", "\x1E", "\x1F"
514
            ),
515 4
            array(
516 4
                '\x00', '\x01', '\x02', '\x03', '\x04',
517 4
                '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A',
518 4
                '\x0B', '\x0C', '\x0D', '\x0E', '\x0F', '\x10', '\x11',
519
                '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18',
520
                '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F'
521 2
            ),
522
            $string
523
        );
524 3
    }
525 3
526
    /**
527 4
     * Tries to print the query and returns the result.
528
     *
529
     * @param Token $token The token to be printed.
530
     *
531
     * @return string
532
     */
533
    public function toString($token)
534
    {
535
        $text = $token->token;
536
537 8
        foreach ($this->options['formats'] as $format) {
538
            if (($token->type === $format['type'])
539 8
                && (($token->flags & $format['flags']) === $format['flags'])
540 8
            ) {
541 8
                // Running transformation function.
542 8
                if (!empty($format['function'])) {
543 8
                    $func = $format['function'];
544 4
                    $text = $func($text);
545
                }
546 8
547
                // Formatting HTML.
548
                if ($this->options['type'] === 'html') {
549
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
550
                } elseif ($this->options['type'] === 'cli') {
551
                    return $format['cli'] . $this->escapeConsole($text);
552
                }
553
554
                break;
555
            }
556
        }
557
558
        if ($this->options['type'] === 'cli') {
559
            return "\x1b[39m" . $this->escapeConsole($text);
560
        } elseif ($this->options['type'] === 'html') {
561
            return htmlspecialchars($text, ENT_NOQUOTES);
562
        }
563
        return $text;
564
    }
565
566
    /**
567
     * Formats a query.
568
     *
569
     * @param string $query   The query to be formatted
570
     * @param array  $options The formatting options.
571
     *
572
     * @return string          The formatted string.
573
     */
574
    public static function format($query, array $options = array())
575
    {
576
        $lexer = new Lexer($query);
577
        $formatter = new Formatter($options);
578
        return $formatter->formatList($lexer->list);
579
    }
580
581
    /**
582
     * Computes the length of a group.
583
     *
584
     * A group is delimited by a pair of brackets.
585
     *
586
     * @param TokensList $list The list of tokens.
587
     *
588
     * @return int
589
     */
590
    public static function getGroupLength($list)
591
    {
592
        /**
593
         * The number of opening brackets found.
594
         * This counter starts at one because by the time this function called,
595
         * the list already advanced one position and the opening bracket was
596
         * already parsed.
597
         *
598
         * @var int $count
599
         */
600
        $count = 1;
601
602
        /**
603
         * The length of this group.
604
         *
605
         * @var int $length
606
         */
607
        $length = 0;
608
609
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
610
            // Counting the brackets.
611
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
612
                if ($list->tokens[$idx]->value === '(') {
613
                    ++$count;
614
                } elseif ($list->tokens[$idx]->value === ')') {
615
                    --$count;
616
                    if ($count == 0) {
617
                        break;
618
                    }
619
                }
620
            }
621
622
            // Keeping track of this group's length.
623
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
624
        }
625
626
        return $length;
627
    }
628
629
    /**
630
     * Checks if a token is a statement or a clause inside a statement.
631
     *
632
     * @param Token $token The token to be checked.
633
     *
634
     * @return int|bool
635
     */
636
    public static function isClause($token)
637
    {
638
        if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER'))
639
            || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value])))
640
        ) {
641
            return 2;
642
        } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) {
643
            return 1;
644
        }
645
        return false;
646
    }
647
}
648