Completed
Push — master ( 5e69cd...d8547b )
by Michal
04:09
created

Formatter::formatList()   F

Complexity

Conditions 52
Paths > 20000

Size

Total Lines 242
Code Lines 92

Duplication

Lines 9
Ratio 3.72 %

Code Coverage

Tests 108
CRAP Score 52.6107

Importance

Changes 8
Bugs 3 Features 1
Metric Value
c 8
b 3
f 1
dl 9
loc 242
ccs 108
cts 115
cp 0.9391
rs 2
cc 52
eloc 92
nc 27666
nop 1
crap 52.6107

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Utilities that are used for formatting queries.
5
 *
6
 * @package    SqlParser
7
 * @subpackage Utils
8
 */
9
namespace SqlParser\Utils;
10
11
use SqlParser\Lexer;
12
use SqlParser\Parser;
13
use SqlParser\Token;
14
use SqlParser\TokensList;
15
16
/**
17
 * Utilities that are used for formatting queries.
18
 *
19
 * @category   Misc
20
 * @package    SqlParser
21
 * @subpackage Utils
22
 * @author     Dan Ungureanu <[email protected]>
23
 * @license    http://opensource.org/licenses/GPL-2.0 GNU Public License
24
 */
25
class Formatter
26
{
27
28
    /**
29
     * The formatting options.
30
     *
31
     * @var array
32
     */
33
    public $options;
34
35
    /**
36
     * Clauses that must be inlined.
37
     *
38
     * These clauses usually are short and it's nicer to have them inline.
39
     *
40
     * @var array
41
     */
42
    public static $INLINE_CLAUSES = array(
43
        'CREATE'                        => true,
44
        'LIMIT'                         => true,
45
        'PARTITION BY'                  => true,
46
        'PARTITION'                     => true,
47
        'PROCEDURE'                     => true,
48
        'SUBPARTITION BY'               => true,
49
        'VALUES'                        => true,
50
    );
51
52
    /**
53
     * Constructor.
54
     *
55
     * @param array $options The formatting options.
56
     */
57 8
    public function __construct(array $options = array())
58
    {
59
        // The specified formatting options are merged with the default values.
60 8
        $this->options = array_merge(
61
            array(
62
63
                /**
64
                 * The format of the result.
65
                 *
66
                 * @var string The type ('text', 'cli' or 'html')
67
                 */
68 8
                'type' => php_sapi_name() == 'cli' ? 'cli' : 'text',
69
70
                /**
71
                 * The line ending used.
72
                 * By default, for text this is "\n" and for HTML this is "<br/>".
73
                 *
74
                 * @var string
75
                 */
76 8
                'line_ending' => $this->options['type'] == 'html' ? '<br/>' : "\n",
77
78
                /**
79
                 * The string used for indentation.
80
                 *
81
                 * @var string
82
                 */
83 8
                'indentation' => '  ',
84
85
                /**
86
                 * Whether comments should be removed or not.
87
                 *
88
                 * @var bool
89
                 */
90 8
                'remove_comments' => false,
91
92
                /**
93
                 * Whether each clause should be on a new line.
94
                 *
95
                 * @var bool
96
                 */
97 8
                'clause_newline' => true,
98
99
                /**
100
                 * Whether each part should be on a new line.
101
                 * Parts are delimited by brackets and commas.
102
                 *
103
                 * @var bool
104
                 */
105 8
                'parts_newline' => true,
106
107
                /**
108
                 * Whether each part of each clause should be indented.
109
                 *
110
                 * @var bool
111
                 */
112 8
                'indent_parts' => true,
113
114
                /**
115
                 * The styles used for HTML formatting.
116
                 * array($type, $flags, $span, $callback)
117
                 *
118
                 * @var array[]
119
                 */
120
                'formats' => array(
121
                    array(
122 8
                        'type'      => Token::TYPE_KEYWORD,
123 8
                        'flags'     => Token::FLAG_KEYWORD_RESERVED,
124 8
                        'html'      => 'class="sql-reserved"',
125 8
                        'cli'       => "\e[35m",
126 8
                        'function'  => 'strtoupper',
127 8
                    ),
128
                    array(
129 8
                        'type'      => Token::TYPE_KEYWORD,
130 8
                        'flags'     => 0,
131 8
                        'html'      => 'class="sql-keyword"',
132 8
                        'cli'       => "\e[95m",
133 8
                        'function'  => 'strtoupper',
134 8
                    ),
135
                    array(
136 8
                        'type'      => Token::TYPE_COMMENT,
137 8
                        'flags'     => 0,
138 8
                        'html'      => 'class="sql-comment"',
139 8
                        'cli'       => "\e[37m",
140 8
                        'function'  => '',
141 8
                    ),
142
                    array(
143 8
                        'type'      => Token::TYPE_BOOL,
144 8
                        'flags'     => 0,
145 8
                        'html'      => 'class="sql-atom"',
146 8
                        'cli'       => "\e[36m",
147 8
                        'function'  => 'strtoupper',
148 8
                    ),
149
                    array(
150 8
                        'type'      => Token::TYPE_NUMBER,
151 8
                        'flags'     => 0,
152 8
                        'html'      => 'class="sql-number"',
153 8
                        'cli'       => "\e[92m",
154 8
                        'function'  => 'strtolower',
155 8
                    ),
156
                    array(
157 8
                        'type'      => Token::TYPE_STRING,
158 8
                        'flags'     => 0,
159 8
                        'html'      => 'class="sql-string"',
160 8
                        'cli'       => "\e[91m",
161 8
                        'function'  => '',
162 8
                    ),
163
                    array(
164 8
                        'type'      => Token::TYPE_SYMBOL,
165 8
                        'flags'     => 0,
166 8
                        'html'      => 'class="sql-variable"',
167 8
                        'cli'       => "\e[36m",
168 8
                        'function'  => '',
169 8
                    ),
170
                )
171 8
            ),
172
            $options
173 8
        );
174
175
        // `parts_newline` requires `clause_newline`
176 8
        $this->options['parts_newline'] &= $this->options['clause_newline'];
177 8
    }
178
179
    /**
180
     * Formats the given list of tokens.
181
     *
182
     * @param TokensList $list The list of tokens.
183
     *
184
     * @return string
185
     */
186 8
    public function formatList($list)
187
    {
188
189
        /**
190
         * The query to be returned.
191
         *
192
         * @var string $ret
193
         */
194 8
        $ret = '';
195
196
        /**
197
         * The indentation level.
198
         *
199
         * @var int $indent
200
         */
201 8
        $indent = 0;
202
203
        /**
204
         * Whether the line ended.
205
         *
206
         * @var bool $lineEnded
207
         */
208 8
        $lineEnded = false;
209
210
        /**
211
         * Whether current group is short (no linebreaks)
212
         *
213
         * @var bool $shortGroup
214
         */
215 8
        $shortGroup = false;
216
217
        /**
218
         * The name of the last clause.
219
         *
220
         * @var string $lastClause
221
         */
222 8
        $lastClause = '';
223
224
        /**
225
         * A stack that keeps track of the indentation level every time a new
226
         * block is found.
227
         *
228
         * @var array $blocksIndentation
229
         */
230 8
        $blocksIndentation = array();
231
232
        /**
233
         * A stack that keeps track of the line endings every time a new block
234
         * is found.
235
         *
236
         * @var array $blocksLineEndings
237
         */
238 8
        $blocksLineEndings = array();
239
240
        /**
241
         * Whether clause's options were formatted.
242
         *
243
         * @var bool $formattedOptions
244
         */
245 8
        $formattedOptions = false;
246
247
        /**
248
         * Previously parsed token.
249
         *
250
         * @var Token $prev
251
         */
252 8
        $prev = null;
253
254
        /**
255
         * Comments are being formatted separately to maintain the whitespaces
256
         * before and after them.
257
         *
258
         * @var string $comment
259
         */
260 8
        $comment = '';
261
262
        // In order to be able to format the queries correctly, the next token
263
        // must be taken into consideration. The loop below uses two pointers,
264
        // `$prev` and `$curr` which store two consecutive tokens.
265
        // Actually, at every iteration the previous token is being used.
266 8
        for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
267
            /**
268
             * Token parsed at this moment.
269
             *
270
             * @var Token $curr
271
             */
272 8
            $curr = $list->tokens[$list->idx];
273
274 8
            if ($curr->type === Token::TYPE_WHITESPACE) {
275
                // Whitespaces are skipped because the formatter adds its own.
276 8
                continue;
277 8
            } elseif ($curr->type === Token::TYPE_COMMENT) {
278
                // Whether the comments should be parsed.
279 1
                if (!empty($this->options['remove_comments'])) {
280
                    continue;
281
                }
282
283 1
                if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) {
284
                    // The whitespaces before and after are preserved for
285
                    // formatting reasons.
286 1
                    $comment .= $list->tokens[$list->idx - 1]->token;
287 1
                }
288 1
                $comment .= $this->toString($curr);
289 1
                if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE)
290 1
                    && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT)
291 1
                ) {
292
                    // Adding the next whitespace only there is no comment that
293
                    // follows it immediately which may cause adding a
294
                    // whitespace twice.
295
                    $comment .= $list->tokens[$list->idx + 1]->token;
296
                }
297
298
                // Everything was handled here, no need to continue.
299 1
                continue;
300
            }
301
302
            // Checking if pointers were initialized.
303 8
            if ($prev !== null) {
304
                // Checking if a new clause started.
305 8
                if (static::isClause($prev) !== false) {
306 8
                    $lastClause = $prev->value;
307 8
                    $formattedOptions = false;
308 8
                }
309
310
                // The options of a clause should stay on the same line and everything that follows.
311 8
                if (($this->options['parts_newline'])
312 8
                    && (!$formattedOptions)
313 8
                    && (empty(self::$INLINE_CLAUSES[$lastClause]))
314 8
                    && (($curr->type !== Token::TYPE_KEYWORD)
315 3
                    || (($curr->type === Token::TYPE_KEYWORD)
316 3
                    && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION)))
317 8
                ) {
318 8
                    $formattedOptions = true;
319 8
                    $lineEnded = true;
320 8
                    ++$indent;
321 8
                }
322
323
                // Checking if this clause ended.
324 8
                if ($tmp = static::isClause($curr)) {
325 4
                    if (($tmp == 2) || ($this->options['clause_newline'])) {
326 4
                        $lineEnded = true;
327 4
                        if ($this->options['parts_newline']) {
328 4
                            --$indent;
329 4
                        }
330 4
                    }
331 4
                }
332
333
                // Indenting BEGIN ... END blocks.
334 8
                if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) {
335 1
                    $lineEnded = true;
336 1
                    array_push($blocksIndentation, $indent);
337 1
                    ++$indent;
338 8 View Code Duplication
                } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) {
339 1
                    $lineEnded = true;
340 1
                    $indent = array_pop($blocksIndentation);
341 1
                }
342
343
                // Formatting fragments delimited by comma.
344 8
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) {
345
                    // Fragments delimited by a comma are broken into multiple
346
                    // pieces only if the clause is not inlined or this fragment
347
                    // is between brackets that are on new line.
348 2
                    if (((empty(self::$INLINE_CLAUSES[$lastClause]))
349 2
                        && ! $shortGroup
350 2
                        && ($this->options['parts_newline']))
351 2
                        || (end($blocksLineEndings) === true)
352 2
                    ) {
353 1
                        $lineEnded = true;
354 1
                    }
355 2
                }
356
357
                // Handling brackets.
358
                // Brackets are indented only if the length of the fragment between
359
                // them is longer than 30 characters.
360 8
                if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) {
361 4
                    array_push($blocksIndentation, $indent);
362 4
                    $shortGroup = true;
363 4
                    if (static::getGroupLength($list) > 30) {
364
                        ++$indent;
365
                        $lineEnded = true;
366
                        $shortGroup = false;
367
                    }
368 4
                    array_push($blocksLineEndings, $lineEnded);
369 8 View Code Duplication
                } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) {
370 3
                    $indent = array_pop($blocksIndentation);
371 3
                    $lineEnded |= array_pop($blocksLineEndings);
372 3
                    $shortGroup = false;
373 3
                }
374
375
                // Delimiter must be placed on the same line with the last
376
                // clause.
377 8
                if ($curr->type === Token::TYPE_DELIMITER) {
378 8
                    $lineEnded = false;
379 8
                }
380
381
                // Adding the token.
382 8
                $ret .= $this->toString($prev);
383
384
                // Finishing the line.
385 8
                if ($lineEnded) {
386 8
                    if ($indent < 0) {
387
                        // TODO: Make sure this never occurs and delete it.
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
388 2
                        $indent = 0;
389 2
                    }
390
391 8
                    if ($curr->type !== Token::TYPE_COMMENT) {
392 8
                        $ret .= $this->options['line_ending']
393 8
                            . str_repeat($this->options['indentation'], $indent);
394 8
                    }
395 8
                    $lineEnded = false;
396 8
                } else {
397
                    // If the line ended there is no point in adding whitespaces.
398
                    // Also, some tokens do not have spaces before or after them.
399 8
                    if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '(')))
400
                        // No space after . (
401 8
                        || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',')
402 5
                        || ($curr->value === '(') || ($curr->value === ')')))
403
                        // No space before . , ( )
404 8
                        || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2))
405
                        // A space after delimiters that are longer than 2 characters.
406 8
                        || ($prev->value === 'DELIMITER')
407 8
                    ) {
408 4
                        $ret .= ' ';
409 4
                    }
410
                }
411 8
            }
412
413 8
            if (!empty($comment)) {
414 1
                $ret .= $comment;
415 1
                $comment = '';
416 1
            }
417
418
            // Iteration finished, consider current token as previous.
419 8
            $prev = $curr;
420 8
        }
421
422 8
        if ($this->options['type'] === 'cli') {
423 1
            return $ret . "\e[0m";
424
        }
425
426 7
        return $ret;
427
    }
428
429
    /**
430
     * Tries to print the query and returns the result.
431
     *
432
     * @param Token $token The token to be printed.
433
     *
434
     * @return string
435
     */
436 8
    public function toString($token)
437
    {
438 8
        $text = $token->token;
439
440 8
        foreach ($this->options['formats'] as $format) {
441 8
            if (($token->type === $format['type'])
442 8
                && (($token->flags & $format['flags']) === $format['flags'])
443 8
            ) {
444
                // Running transformation function.
445 8
                if (!empty($format['function'])) {
446 8
                    $func = $format['function'];
447 8
                    $text = $func($text);
448 8
                }
449
450
                // Formatting HTML.
451 8
                if ($this->options['type'] === 'html') {
452 7
                    return '<span ' . $format['html'] . '>' . $text . '</span>';
453 1
                } elseif ($this->options['type'] === 'cli') {
454 1
                    return $format['cli'] . $text;
455
                }
456
457
                break;
458
            }
459 8
        }
460
461 5
        if ($this->options['type'] === 'cli') {
462
            return "\e[39m" . $text;
463
        }
464 5
        return $text;
465
    }
466
467
    /**
468
     * Formats a query.
469
     *
470
     * @param string $query   The query to be formatted
471
     * @param array  $options The formatting options.
472
     *
473
     * @return string          The formatted string.
474
     */
475 8
    public static function format($query, array $options = array())
476
    {
477 8
        $lexer = new Lexer($query);
478 8
        $formatter = new Formatter($options);
479 8
        return $formatter->formatList($lexer->list);
480
    }
481
482
    /**
483
     * Computes the length of a group.
484
     *
485
     * A group is delimited by a pair of brackets.
486
     *
487
     * @param TokensList $list The list of tokens.
488
     *
489
     * @return int
490
     */
491 4
    public static function getGroupLength($list)
492
    {
493
        /**
494
         * The number of opening brackets found.
495
         * This counter starts at one because by the time this function called,
496
         * the list already advanced one position and the opening bracket was
497
         * already parsed.
498
         *
499
         * @var int $count
500
         */
501 4
        $count = 1;
502
503
        /**
504
         * The length of this group.
505
         *
506
         * @var int $length
507
         */
508 4
        $length = 0;
509
510 4
        for ($idx = $list->idx; $idx < $list->count; ++$idx) {
511
            // Counting the brackets.
512 4
            if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) {
513 4
                if ($list->tokens[$idx]->value === '(') {
514
                    ++$count;
515 4
                } elseif ($list->tokens[$idx]->value === ')') {
516 4
                    --$count;
517 4
                    if ($count == 0) {
518 4
                        break;
519
                    }
520
                }
521 2
            }
522
523
            // Keeping track of this group's length.
524 3
            $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8');
525 3
        }
526
527 4
        return $length;
528
    }
529
530
    /**
531
     * Checks if a token is a statement or a clause inside a statement.
532
     *
533
     * @param Token $token The token to be checked.
534
     *
535
     * @return int|bool
536
     */
537 8
    public static function isClause($token)
538
    {
539 8
        if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER'))
540 8
            || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value])))
541 8
        ) {
542 8
            return 2;
543 8
        } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) {
544 4
            return 1;
545
        }
546 8
        return false;
547
    }
548
}
549