Completed
Push — master ( 7eebd4...4aadd2 )
by Michal
521:02 queued 455:56
created

Formatter::getMergedOptions()   B

Complexity

Conditions 4
Paths 6

Size

Total Lines 22
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 12
nc 6
nop 1
dl 0
loc 22
ccs 3
cts 3
cp 1
crap 4
rs 8.9197
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 *
6
 * @package    SqlParser
7
 * @subpackage Utils
8
 */
9
namespace SqlParser\Utils;
10
11
use SqlParser\Lexer;
12
use SqlParser\Parser;
13
use SqlParser\Token;
14
use SqlParser\TokensList;
15
16
/**
17
 * Utilities that are used for formatting queries.
18
 *
19
 * @category   Misc
20
 * @package    SqlParser
21
 * @subpackage Utils
22
 * @license    https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
23
 */
24
class Formatter
25
{
26
27
    /**
28
     * The formatting options.
29
     *
30
     * @var array
31
     */
32
    public $options;
33
34
    /**
35
     * Clauses that must be inlined.
36
     *
37
     * These clauses usually are short and it's nicer to have them inline.
38
     *
39
     * @var array
40
     */
41
    public static $INLINE_CLAUSES = array(
42
        'CREATE'                        => true,
43
        'LIMIT'                         => true,
44
        'PARTITION BY'                  => true,
45
        'PARTITION'                     => true,
46
        'PROCEDURE'                     => true,
47
        'SUBPARTITION BY'               => true,
48
        'VALUES'                        => true,
49
    );
50
51
    /**
52
     * Constructor.
53
     *
54
     * @param array $options The formatting options.
55
     */
56
    public function __construct(array $options = array())
57 8
    {
58
        $this->options = $this->getMergedOptions($options);
59
    }
60 8
61
    /**
62
     * The specified formatting options are merged with the default values.
63
     *
64
     * @param array $options
65
     * @return array
66
     */
67
    private function getMergedOptions(array $options)
68 8
    {
69
        $options = array_merge(
70
            $this->getDefaultOptions(),
71
            $options
72
        );
73
74
        if (isset($options['formats'])) {
75
            $options['formats'] = self::mergeFormats($this->getDefaultFormats(), $options['formats']);
76 8
        } else {
77
            $options['formats'] = $this->getDefaultFormats();
78
        }
79
80
        if (is_null($options['line_ending'])) {
81
            $options['line_ending'] = $options['type'] === 'html' ? '<br/>' : "\n";
82
        }
83 8
84
        // `parts_newline` requires `clause_newline`
85
        $options['parts_newline'] &= $options['clause_newline'];
86
87
        return $options;
88
    }
89
90 8
    /**
91
     * The default formatting options.
92
     *
93
     * @return array
94
     */
95
    protected function getDefaultOptions()
96
    {
97 8
        return array(
98
            /**
99
             * The format of the result.
100
             *
101
             * @var string The type ('text', 'cli' or 'html')
102
             */
103
            'type' => php_sapi_name() === 'cli' ? 'cli' : 'text',
104
105 8
            /**
106
             * The line ending used.
107
             * By default, for text this is "\n" and for HTML this is "<br/>".
108
             *
109
             * @var string
110
             */
111
            'line_ending' => NULL,
112 8
113
            /**
114
             * The string used for indentation.
115
             *
116
             * @var string
117
             */
118
            'indentation' => '  ',
119
120
            /**
121
             * Whether comments should be removed or not.
122 8
             *
123 8
             * @var bool
124 8
             */
125 8
            'remove_comments' => false,
126 8
127 8
            /**
128
             * Whether each clause should be on a new line.
129 8
             *
130 8
             * @var bool
131 8
             */
132 8
            'clause_newline' => true,
133 8
134 8
            /**
135
             * Whether each part should be on a new line.
136 8
             * Parts are delimited by brackets and commas.
137 8
             *
138 8
             * @var bool
139 8
             */
140 8
            'parts_newline' => true,
141 8
142
            /**
143 8
             * Whether each part of each clause should be indented.
144 8
             *
145 8
             * @var bool
146 8
             */
147 8
            'indent_parts' => true,
148 8
        );
149
    }
150 8
151 8
    /**
152 8
     * The styles used for HTML formatting.
153 8
     * array($type, $flags, $span, $callback)
154 8
     *
155 8
     * @return array
156
     */
157 8
    protected function getDefaultFormats()
158 8
    {
159 8
        return array(
160 8
            array(
161 8
                'type' => Token::TYPE_KEYWORD,
162 8
                'flags' => Token::FLAG_KEYWORD_RESERVED,
163
                'html' => 'class="sql-reserved"',
164 8
                'cli' => "\x1b[35m",
165 8
                'function' => 'strtoupper',
166 8
            ),
167 8
            array(
168 8
                'type' => Token::TYPE_KEYWORD,
169 8
                'flags' => 0,
170
                'html' => 'class="sql-keyword"',
171 8
                'cli' => "\x1b[95m",
172
                'function' => 'strtoupper',
173 8
            ),
174
            array(
175
                'type' => Token::TYPE_COMMENT,
176 8
                'flags' => 0,
177 8
                'html' => 'class="sql-comment"',
178
                'cli' => "\x1b[37m",
179
                'function' => '',
180
            ),
181
            array(
182
                'type' => Token::TYPE_BOOL,
183
                'flags' => 0,
184
                'html' => 'class="sql-atom"',
185
                'cli' => "\x1b[36m",
186 8
                'function' => 'strtoupper',
187
            ),
188
            array(
189
                'type' => Token::TYPE_NUMBER,
190
                'flags' => 0,
191
                'html' => 'class="sql-number"',
192
                'cli' => "\x1b[92m",
193
                'function' => 'strtolower',
194 8
            ),
195
            array(
196
                'type' => Token::TYPE_STRING,
197
                'flags' => 0,
198
                'html' => 'class="sql-string"',
199
                'cli' => "\x1b[91m",
200
                'function' => '',
201 8
            ),
202
            array(
203
                'type' => Token::TYPE_SYMBOL,
204
                'flags' => 0,
205
                'html' => 'class="sql-variable"',
206
                'cli' => "\x1b[36m",
207
                'function' => '',
208 8
            ),
209
        );
210
    }
211
212
    private static function mergeFormats(array $formats, array $newFormats)
213
    {
214
        $added = array();
215 8
        $integers = array('flags', 'type');
216
        $strings = array('html', 'cli', 'function');
217
218
        /* Sanitize the array so that we do not have to care later */
219
        foreach ($newFormats as $j => $new) {
220
            foreach ($integers as $name) {
221
                if (! isset($new[$name])) {
222 8
                    $newFormats[$j][$name] = 0;
223
                }
224
            }
225
            foreach ($strings as $name) {
226
                if (! isset($new[$name])) {
227
                    $newFormats[$j][$name] = '';
228
                }
229
            }
230 8
        }
231
232
        /* Process changes to existing formats */
233
        foreach ($formats as $i => $original) {
234
            foreach ($newFormats as $j => $new) {
235
                if ($new['type'] === $original['type']
236
                    && $original['flags'] === $new['flags']
237
                ) {
238 8
                    $formats[$i] = $new;
239
                    $added[] = $j;
240
                }
241
            }
242
        }
243
244
        /* Add not already handled formats */
245 8
        foreach ($newFormats as $j => $new) {
246
            if (! in_array($j, $added)) {
247
                $formats[] = $new;
248
            }
249
        }
250
251
        return $formats;
252 8
    }
253
254
    /**
255
     * Formats the given list of tokens.
256
     *
257
     * @param TokensList $list The list of tokens.
258
     *
259
     * @return string
260 8
     */
261
    public function formatList($list)
262
    {
263
264
        /**
265
         * The query to be returned.
266 8
         *
267
         * @var string $ret
268
         */
269
        $ret = '';
270
271
        /**
272 8
         * The indentation level.
273
         *
274 8
         * @var int $indent
275
         */
276 8
        $indent = 0;
277 8
278
        /**
279 1
         * Whether the line ended.
280
         *
281
         * @var bool $lineEnded
282
         */
283 1
        $lineEnded = false;
284
285
        /**
286 1
         * Whether current group is short (no linebreaks)
287 1
         *
288 1
         * @var bool $shortGroup
289 1
         */
290 1
        $shortGroup = false;
291 1
292
        /**
293
         * The name of the last clause.
294
         *
295
         * @var string $lastClause
296
         */
297
        $lastClause = '';
298
299 1
        /**
300
         * A stack that keeps track of the indentation level every time a new
301
         * block is found.
302
         *
303 8
         * @var array $blocksIndentation
304
         */
305 8
        $blocksIndentation = array();
306 8
307 8
        /**
308 8
         * A stack that keeps track of the line endings every time a new block
309
         * is found.
310
         *
311 8
         * @var array $blocksLineEndings
312 8
         */
313 8
        $blocksLineEndings = array();
314 8
315 3
        /**
316 3
         * Whether clause's options were formatted.
317 8
         *
318 8
         * @var bool $formattedOptions
319 8
         */
320 8
        $formattedOptions = false;
321 8
322
        /**
323
         * Previously parsed token.
324 8
         *
325 4
         * @var Token $prev
326 4
         */
327 4
        $prev = null;
328 4
329 4
        /**
330 4
         * Comments are being formatted separately to maintain the whitespaces
331 4
         * before and after them.
332
         *
333
         * @var string $comment
334 8
         */
335 1
        $comment = '';
336 1
337 1
        // In order to be able to format the queries correctly, the next token
338 8
        // must be taken into consideration. The loop below uses two pointers,
339 1
        // `$prev` and `$curr` which store two consecutive tokens.
340 1
        // Actually, at every iteration the previous token is being used.
341 1
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
342
            /**
343
             * Token parsed at this moment.
344 8
             *
345
             * @var Token $curr
346
             */
347
            $curr = $list->tokens[$list->idx];
348 2
349 2
            if ($curr->type === Token::TYPE_WHITESPACE) {
350 2
                // Whitespaces are skipped because the formatter adds its own.
351 2
                continue;
352 2
            } elseif ($curr->type === Token::TYPE_COMMENT) {
353 1
                // Whether the comments should be parsed.
354 1
                if (!empty($this->options['remove_comments'])) {
355 2
                    continue;
356
                }
357
358
                if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) {
359
                    // The whitespaces before and after are preserved for
360 8
                    // formatting reasons.
361 4
                    $comment .= $list->tokens[$list->idx - 1]->token;
362 4
                }
363 4
                $comment .= $this->toString($curr);
364
                if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE)
365
                    && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT)
366
                ) {
367
                    // Adding the next whitespace only there is no comment that
368 4
                    // follows it immediately which may cause adding a
369 8
                    // whitespace twice.
370 3
                    $comment .= $list->tokens[$list->idx + 1]->token;
371 3
                }
372 3
373 3
                // Everything was handled here, no need to continue.
374
                continue;
375
            }
376
377 8
            // Checking if pointers were initialized.
378 8
            if ($prev !== null) {
379 8
                // Checking if a new clause started.
380
                if (static::isClause($prev) !== false) {
381
                    $lastClause = $prev->value;
382 8
                    $formattedOptions = false;
383
                }
384
385 8
                // The options of a clause should stay on the same line and everything that follows.
386 8
                if (($this->options['parts_newline'])
387
                    && (!$formattedOptions)
388 2
                    && (empty(self::$INLINE_CLAUSES[$lastClause]))
389 2
                    && (($curr->type !== Token::TYPE_KEYWORD)
390
                    || (($curr->type === Token::TYPE_KEYWORD)
391 8
                    && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION)))
392 8
                ) {
393 8
                    $formattedOptions = true;
394 8
                    $lineEnded = true;
395 8
                    ++$indent;
396 8
                }
397
398
                // Checking if this clause ended.
399 8
                if ($tmp = static::isClause($curr)) {
400
                    if (($tmp == 2) || ($this->options['clause_newline'])) {
401 8
                        $lineEnded = true;
402 5
                        if ($this->options['parts_newline']) {
403
                            --$indent;
404 8
                        }
405
                    }
406 8
                }
407 8
408 4
                // Indenting BEGIN ... END blocks.
409 4
                if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) {
410
                    $lineEnded = true;
411 8
                    array_push($blocksIndentation, $indent);
412
                    ++$indent;
413 8 View Code Duplication
                } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) {
414 1
                    $lineEnded = true;
415 1
                    $indent = array_pop($blocksIndentation);
416 1
                }
417
418
                // Formatting fragments delimited by comma.
419 8
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) {
420 8
                    // Fragments delimited by a comma are broken into multiple
421
                    // pieces only if the clause is not inlined or this fragment
422 8
                    // is between brackets that are on new line.
423 1
                    if (((empty(self::$INLINE_CLAUSES[$lastClause]))
424
                        && ! $shortGroup
425
                        && ($this->options['parts_newline']))
426 7
                        || (end($blocksLineEndings) === true)
427
                    ) {
428
                        $lineEnded = true;
429
                    }
430
                }
431
432
                // Handling brackets.
433
                // Brackets are indented only if the length of the fragment between
434
                // them is longer than 30 characters.
435
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) {
436 8
                    array_push($blocksIndentation, $indent);
437
                    $shortGroup = true;
438 8
                    if (static::getGroupLength($list) > 30) {
439
                        ++$indent;
440 8
                        $lineEnded = true;
441 8
                        $shortGroup = false;
442 8
                    }
443 8
                    array_push($blocksLineEndings, $lineEnded);
444 View Code Duplication
                } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) {
445 8
                    $indent = array_pop($blocksIndentation);
446 8
                    $lineEnded |= array_pop($blocksLineEndings);
447 8
                    $shortGroup = false;
448 8
                }
449
450
                // Delimiter must be placed on the same line with the last
451 8
                // clause.
452 7
                if ($curr->type === Token::TYPE_DELIMITER) {
453 1
                    $lineEnded = false;
454 1
                }
455
456
                // Adding the token.
457
                $ret .= $this->toString($prev);
458
459 8
                // Finishing the line.
460
                if ($lineEnded) {
461 5
                    if ($indent < 0) {
462
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
463
                        $indent = 0;
464 5
                    }
465
466
                    if ($curr->type !== Token::TYPE_COMMENT) {
467
                        $ret .= $this->options['line_ending']
468
                            . str_repeat($this->options['indentation'], $indent);
469
                    }
470
                    $lineEnded = false;
471
                } else {
472
                    // If the line ended there is no point in adding whitespaces.
473
                    // Also, some tokens do not have spaces before or after them.
474
                    if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '(')))
475 8
                        // No space after . (
476
                        || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',')
477 8
                        || ($curr->value === '(') || ($curr->value === ')')))
478 8
                        // No space before . , ( )
479 8
                        || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2))
480
                        // A space after delimiters that are longer than 2 characters.
481
                        || ($prev->value === 'DELIMITER')
482
                    ) {
483
                        $ret .= ' ';
484
                    }
485
                }
486
            }
487
488
            if (!empty($comment)) {
489
                $ret .= $comment;
490
                $comment = '';
491 4
            }
492
493
            // Iteration finished, consider current token as previous.
494
            $prev = $curr;
495
        }
496
497
        if ($this->options['type'] === 'cli') {
498
            return $ret . "\x1b[0m";
499
        }
500
501 4
        return $ret;
502
    }
503
504
    public function escapeConsole($string)
505
    {
506
        return str_replace(
507
            array(
508 4
                "\x00", "\x01", "\x02", "\x03", "\x04",
509
                "\x05", "\x06", "\x07", "\x08", "\x09", "\x0A",
510 4
                "\x0B","\x0C","\x0D", "\x0E", "\x0F", "\x10", "\x11",
511
                "\x12","\x13","\x14","\x15", "\x16", "\x17", "\x18",
512 4
                "\x19","\x1A","\x1B","\x1C","\x1D", "\x1E", "\x1F"
513 4
            ),
514
            array(
515 4
                '\x00', '\x01', '\x02', '\x03', '\x04',
516 4
                '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A',
517 4
                '\x0B', '\x0C', '\x0D', '\x0E', '\x0F', '\x10', '\x11',
518 4
                '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18',
519
                '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F'
520
            ),
521 2
            $string
522
        );
523
    }
524 3
525 3
    /**
526
     * Tries to print the query and returns the result.
527 4
     *
528
     * @param Token $token The token to be printed.
529
     *
530
     * @return string
531
     */
532
    public function toString($token)
533
    {
534
        $text = $token->token;
535
536
        foreach ($this->options['formats'] as $format) {
537 8
            if (($token->type === $format['type'])
538
                && (($token->flags & $format['flags']) === $format['flags'])
539 8
            ) {
540 8
                // Running transformation function.
541 8
                if (!empty($format['function'])) {
542 8
                    $func = $format['function'];
543 8
                    $text = $func($text);
544 4
                }
545
546 8
                // Formatting HTML.
547
                if ($this->options['type'] === 'html') {
548
                    return '<span ' . $format['html'] . '>' . htmlspecialchars($text, ENT_NOQUOTES) . '</span>';
549
                } elseif ($this->options['type'] === 'cli') {
550
                    return $format['cli'] . $this->escapeConsole($text);
551
                }
552
553
                break;
554
            }
555
        }
556
557
        if ($this->options['type'] === 'cli') {
558
            return "\x1b[39m" . $this->escapeConsole($text);
559
        } elseif ($this->options['type'] === 'html') {
560
            return htmlspecialchars($text, ENT_NOQUOTES);
561
        }
562
        return $text;
563
    }
564
565
    /**
566
     * Formats a query.
567
     *
568
     * @param string $query   The query to be formatted
569
     * @param array  $options The formatting options.
570
     *
571
     * @return string          The formatted string.
572
     */
573
    public static function format($query, array $options = array())
574
    {
575
        $lexer = new Lexer($query);
576
        $formatter = new Formatter($options);
577
        return $formatter->formatList($lexer->list);
578
    }
579
580
    /**
581
     * Computes the length of a group.
582
     *
583
     * A group is delimited by a pair of brackets.
584
     *
585
     * @param TokensList $list The list of tokens.
586
     *
587
     * @return int
588
     */
589
    public static function getGroupLength($list)
590
    {
591
        /**
592
         * The number of opening brackets found.
593
         * This counter starts at one because by the time this function called,
594
         * the list already advanced one position and the opening bracket was
595
         * already parsed.
596
         *
597
         * @var int $count
598
         */
599
        $count = 1;
600
601
        /**
602
         * The length of this group.
603
         *
604
         * @var int $length
605
         */
606
        $length = 0;
607
608
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
609
            // Counting the brackets.
610
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
611
                if ($list->tokens[$idx]->value === '(') {
612
                    ++$count;
613
                } elseif ($list->tokens[$idx]->value === ')') {
614
                    --$count;
615
                    if ($count == 0) {
616
                        break;
617
                    }
618
                }
619
            }
620
621
            // Keeping track of this group's length.
622
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
623
        }
624
625
        return $length;
626
    }
627
628
    /**
629
     * Checks if a token is a statement or a clause inside a statement.
630
     *
631
     * @param Token $token The token to be checked.
632
     *
633
     * @return int|bool
634
     */
635
    public static function isClause($token)
636
    {
637
        if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER'))
638
            || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value])))
639
        ) {
640
            return 2;
641
        } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) {
642
            return 1;
643
        }
644
        return false;
645
    }
646
}
647