SqlFormatter::highlightReservedWord()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
<?php namespace EvolutionCMS\Support\Formatter;
2
3
use PDO;
4
use DateTime;
5
6
/**
7
 * SQL Formatter is a collection of utilities for debugging SQL queries.
8
 * It includes methods for formatting, syntax highlighting, removing comments, etc.
9
 *
10
 * @package    SqlFormatter
11
 * @author     Jeremy Dorn <[email protected]>
12
 * @author     Florin Patan <[email protected]>
13
 * @copyright  2013 Jeremy Dorn
14
 * @license    http://opensource.org/licenses/MIT
15
 * @link       http://github.com/jdorn/sql-formatter
16
 * @version    1.2.18
17
 *
18
 * @see: https://github.com/recca0120/laravel-tracy
19
 */
20
class SqlFormatter
21
{
22
    /**
23
     * KEYWORDS1.
24
     *
25
     * @var string
26
     */
27
    const KEYWORDS1 = 'SELECT|(?:ON\s+DUPLICATE\s+KEY)?UPDATE|INSERT(?:\s+INTO)?|REPLACE(?:\s+INTO)?|DELETE|CALL|UNION|FROM|WHERE|HAVING|GROUP\s+BY|ORDER\s+BY|LIMIT|OFFSET|SET|VALUES|LEFT\s+JOIN|INNER\s+JOIN|TRUNCATE';
28
29
    /**
30
     * KEYWORDS2.
31
     *
32
     * @var string
33
     */
34
    const KEYWORDS2 = 'ALL|DISTINCT|DISTINCTROW|IGNORE|AS|USING|ON|AND|OR|IN|IS|NOT|NULL|[RI]?LIKE|REGEXP|TRUE|FALSE';
35
36
    // Constants for token types
37
    const TOKEN_TYPE_WHITESPACE = 0;
38
    const TOKEN_TYPE_WORD = 1;
39
    const TOKEN_TYPE_QUOTE = 2;
40
    const TOKEN_TYPE_BACKTICK_QUOTE = 3;
41
    const TOKEN_TYPE_RESERVED = 4;
42
    const TOKEN_TYPE_RESERVED_TOPLEVEL = 5;
43
    const TOKEN_TYPE_RESERVED_NEWLINE = 6;
44
    const TOKEN_TYPE_BOUNDARY = 7;
45
    const TOKEN_TYPE_COMMENT = 8;
46
    const TOKEN_TYPE_BLOCK_COMMENT = 9;
47
    const TOKEN_TYPE_NUMBER = 10;
48
    const TOKEN_TYPE_ERROR = 11;
49
    const TOKEN_TYPE_VARIABLE = 12;
50
51
    // Constants for different components of a token
52
    const TOKEN_TYPE = 0;
53
    const TOKEN_VALUE = 1;
54
55
    // Reserved words (for syntax highlighting)
56
    protected static $reserved = array(
57
        'ACCESSIBLE',
58
        'ACTION',
59
        'AGAINST',
60
        'AGGREGATE',
61
        'ALGORITHM',
62
        'ALL',
63
        'ALTER',
64
        'ANALYSE',
65
        'ANALYZE',
66
        'AS',
67
        'ASC',
68
        'AUTOCOMMIT',
69
        'AUTO_INCREMENT',
70
        'BACKUP',
71
        'BEGIN',
72
        'BETWEEN',
73
        'BINLOG',
74
        'BOTH',
75
        'CASCADE',
76
        'CASE',
77
        'CHANGE',
78
        'CHANGED',
79
        'CHARACTER SET',
80
        'CHARSET',
81
        'CHECK',
82
        'CHECKSUM',
83
        'COLLATE',
84
        'COLLATION',
85
        'COLUMN',
86
        'COLUMNS',
87
        'COMMENT',
88
        'COMMIT',
89
        'COMMITTED',
90
        'COMPRESSED',
91
        'CONCURRENT',
92
        'CONSTRAINT',
93
        'CONTAINS',
94
        'CONVERT',
95
        'CREATE',
96
        'CROSS',
97
        'CURRENT_TIMESTAMP',
98
        'DATABASE',
99
        'DATABASES',
100
        'DAY',
101
        'DAY_HOUR',
102
        'DAY_MINUTE',
103
        'DAY_SECOND',
104
        'DEFAULT',
105
        'DEFINER',
106
        'DELAYED',
107
        'DELETE',
108
        'DESC',
109
        'DESCRIBE',
110
        'DETERMINISTIC',
111
        'DISTINCT',
112
        'DISTINCTROW',
113
        'DIV',
114
        'DO',
115
        'DUMPFILE',
116
        'DUPLICATE',
117
        'DYNAMIC',
118
        'ELSE',
119
        'ENCLOSED',
120
        'END',
121
        'ENGINE',
122
        'ENGINE_TYPE',
123
        'ENGINES',
124
        'ESCAPE',
125
        'ESCAPED',
126
        'EVENTS',
127
        'EXEC',
128
        'EXECUTE',
129
        'EXISTS',
130
        'EXPLAIN',
131
        'EXTENDED',
132
        'FAST',
133
        'FIELDS',
134
        'FILE',
135
        'FIRST',
136
        'FIXED',
137
        'FLUSH',
138
        'FOR',
139
        'FORCE',
140
        'FOREIGN',
141
        'FULL',
142
        'FULLTEXT',
143
        'FUNCTION',
144
        'GLOBAL',
145
        'GRANT',
146
        'GRANTS',
147
        'GROUP_CONCAT',
148
        'HEAP',
149
        'HIGH_PRIORITY',
150
        'HOSTS',
151
        'HOUR',
152
        'HOUR_MINUTE',
153
        'HOUR_SECOND',
154
        'IDENTIFIED',
155
        'IF',
156
        'IFNULL',
157
        'IGNORE',
158
        'IN',
159
        'INDEX',
160
        'INDEXES',
161
        'INFILE',
162
        'INSERT',
163
        'INSERT_ID',
164
        'INSERT_METHOD',
165
        'INTERVAL',
166
        'INTO',
167
        'INVOKER',
168
        'IS',
169
        'ISOLATION',
170
        'KEY',
171
        'KEYS',
172
        'KILL',
173
        'LAST_INSERT_ID',
174
        'LEADING',
175
        'LEVEL',
176
        'LIKE',
177
        'LINEAR',
178
        'LINES',
179
        'LOAD',
180
        'LOCAL',
181
        'LOCK',
182
        'LOCKS',
183
        'LOGS',
184
        'LOW_PRIORITY',
185
        'MARIA',
186
        'MASTER',
187
        'MASTER_CONNECT_RETRY',
188
        'MASTER_HOST',
189
        'MASTER_LOG_FILE',
190
        'MATCH',
191
        'MAX_CONNECTIONS_PER_HOUR',
192
        'MAX_QUERIES_PER_HOUR',
193
        'MAX_ROWS',
194
        'MAX_UPDATES_PER_HOUR',
195
        'MAX_USER_CONNECTIONS',
196
        'MEDIUM',
197
        'MERGE',
198
        'MINUTE',
199
        'MINUTE_SECOND',
200
        'MIN_ROWS',
201
        'MODE',
202
        'MODIFY',
203
        'MONTH',
204
        'MRG_MYISAM',
205
        'MYISAM',
206
        'NAMES',
207
        'NATURAL',
208
        'NOT',
209
        'NOW()',
210
        'NULL',
211
        'OFFSET',
212
        'ON',
213
        'OPEN',
214
        'OPTIMIZE',
215
        'OPTION',
216
        'OPTIONALLY',
217
        'ON UPDATE',
218
        'ON DELETE',
219
        'OUTFILE',
220
        'PACK_KEYS',
221
        'PAGE',
222
        'PARTIAL',
223
        'PARTITION',
224
        'PARTITIONS',
225
        'PASSWORD',
226
        'PRIMARY',
227
        'PRIVILEGES',
228
        'PROCEDURE',
229
        'PROCESS',
230
        'PROCESSLIST',
231
        'PURGE',
232
        'QUICK',
233
        'RANGE',
234
        'RAID0',
235
        'RAID_CHUNKS',
236
        'RAID_CHUNKSIZE',
237
        'RAID_TYPE',
238
        'READ',
239
        'READ_ONLY',
240
        'READ_WRITE',
241
        'REFERENCES',
242
        'REGEXP',
243
        'RELOAD',
244
        'RENAME',
245
        'REPAIR',
246
        'REPEATABLE',
247
        'REPLACE',
248
        'REPLICATION',
249
        'RESET',
250
        'RESTORE',
251
        'RESTRICT',
252
        'RETURN',
253
        'RETURNS',
254
        'REVOKE',
255
        'RLIKE',
256
        'ROLLBACK',
257
        'ROW',
258
        'ROWS',
259
        'ROW_FORMAT',
260
        'SECOND',
261
        'SECURITY',
262
        'SEPARATOR',
263
        'SERIALIZABLE',
264
        'SESSION',
265
        'SHARE',
266
        'SHOW',
267
        'SHUTDOWN',
268
        'SLAVE',
269
        'SONAME',
270
        'SOUNDS',
271
        'SQL',
272
        'SQL_AUTO_IS_NULL',
273
        'SQL_BIG_RESULT',
274
        'SQL_BIG_SELECTS',
275
        'SQL_BIG_TABLES',
276
        'SQL_BUFFER_RESULT',
277
        'SQL_CALC_FOUND_ROWS',
278
        'SQL_LOG_BIN',
279
        'SQL_LOG_OFF',
280
        'SQL_LOG_UPDATE',
281
        'SQL_LOW_PRIORITY_UPDATES',
282
        'SQL_MAX_JOIN_SIZE',
283
        'SQL_QUOTE_SHOW_CREATE',
284
        'SQL_SAFE_UPDATES',
285
        'SQL_SELECT_LIMIT',
286
        'SQL_SLAVE_SKIP_COUNTER',
287
        'SQL_SMALL_RESULT',
288
        'SQL_WARNINGS',
289
        'SQL_CACHE',
290
        'SQL_NO_CACHE',
291
        'START',
292
        'STARTING',
293
        'STATUS',
294
        'STOP',
295
        'STORAGE',
296
        'STRAIGHT_JOIN',
297
        'STRING',
298
        'STRIPED',
299
        'SUPER',
300
        'TABLE',
301
        'TABLES',
302
        'TEMPORARY',
303
        'TERMINATED',
304
        'THEN',
305
        'TO',
306
        'TRAILING',
307
        'TRANSACTIONAL',
308
        'TRUE',
309
        'TRUNCATE',
310
        'TYPE',
311
        'TYPES',
312
        'UNCOMMITTED',
313
        'UNIQUE',
314
        'UNLOCK',
315
        'UNSIGNED',
316
        'USAGE',
317
        'USE',
318
        'USING',
319
        'VARIABLES',
320
        'VIEW',
321
        'WHEN',
322
        'WITH',
323
        'WORK',
324
        'WRITE',
325
        'YEAR_MONTH'
326
    );
327
328
    // For SQL formatting
329
    // These keywords will all be on their own line
330
    /**
331
     * @var array
332
     */
333
    protected static $reserved_toplevel = array(
334
        'SELECT',
335
        'FROM',
336
        'WHERE',
337
        'SET',
338
        'ORDER BY',
339
        'GROUP BY',
340
        'LIMIT',
341
        'DROP',
342
        'VALUES',
343
        'UPDATE',
344
        'HAVING',
345
        'ADD',
346
        'AFTER',
347
        'ALTER TABLE',
348
        'DELETE FROM',
349
        'UNION ALL',
350
        'UNION',
351
        'EXCEPT',
352
        'INTERSECT'
353
    );
354
355
    /**
356
     * @var array
357
     */
358
    protected static $reserved_newline = array(
359
        'LEFT OUTER JOIN',
360
        'RIGHT OUTER JOIN',
361
        'LEFT JOIN',
362
        'RIGHT JOIN',
363
        'OUTER JOIN',
364
        'INNER JOIN',
365
        'JOIN',
366
        'XOR',
367
        'OR',
368
        'AND'
369
    );
370
371
    /**
372
     * @var array
373
     */
374
    protected static $functions = array(
375
        'ABS',
376
        'ACOS',
377
        'ADDDATE',
378
        'ADDTIME',
379
        'AES_DECRYPT',
380
        'AES_ENCRYPT',
381
        'AREA',
382
        'ASBINARY',
383
        'ASCII',
384
        'ASIN',
385
        'ASTEXT',
386
        'ATAN',
387
        'ATAN2',
388
        'AVG',
389
        'BDMPOLYFROMTEXT',
390
        'BDMPOLYFROMWKB',
391
        'BDPOLYFROMTEXT',
392
        'BDPOLYFROMWKB',
393
        'BENCHMARK',
394
        'BIN',
395
        'BIT_AND',
396
        'BIT_COUNT',
397
        'BIT_LENGTH',
398
        'BIT_OR',
399
        'BIT_XOR',
400
        'BOUNDARY',
401
        'BUFFER',
402
        'CAST',
403
        'CEIL',
404
        'CEILING',
405
        'CENTROID',
406
        'CHAR',
407
        'CHARACTER_LENGTH',
408
        'CHARSET',
409
        'CHAR_LENGTH',
410
        'COALESCE',
411
        'COERCIBILITY',
412
        'COLLATION',
413
        'COMPRESS',
414
        'CONCAT',
415
        'CONCAT_WS',
416
        'CONNECTION_ID',
417
        'CONTAINS',
418
        'CONV',
419
        'CONVERT',
420
        'CONVERT_TZ',
421
        'CONVEXHULL',
422
        'COS',
423
        'COT',
424
        'COUNT',
425
        'CRC32',
426
        'CROSSES',
427
        'CURDATE',
428
        'CURRENT_DATE',
429
        'CURRENT_TIME',
430
        'CURRENT_TIMESTAMP',
431
        'CURRENT_USER',
432
        'CURTIME',
433
        'DATABASE',
434
        'DATE',
435
        'DATEDIFF',
436
        'DATE_ADD',
437
        'DATE_DIFF',
438
        'DATE_FORMAT',
439
        'DATE_SUB',
440
        'DAY',
441
        'DAYNAME',
442
        'DAYOFMONTH',
443
        'DAYOFWEEK',
444
        'DAYOFYEAR',
445
        'DECODE',
446
        'DEFAULT',
447
        'DEGREES',
448
        'DES_DECRYPT',
449
        'DES_ENCRYPT',
450
        'DIFFERENCE',
451
        'DIMENSION',
452
        'DISJOINT',
453
        'DISTANCE',
454
        'ELT',
455
        'ENCODE',
456
        'ENCRYPT',
457
        'ENDPOINT',
458
        'ENVELOPE',
459
        'EQUALS',
460
        'EXP',
461
        'EXPORT_SET',
462
        'EXTERIORRING',
463
        'EXTRACT',
464
        'EXTRACTVALUE',
465
        'FIELD',
466
        'FIND_IN_SET',
467
        'FLOOR',
468
        'FORMAT',
469
        'FOUND_ROWS',
470
        'FROM_DAYS',
471
        'FROM_UNIXTIME',
472
        'GEOMCOLLFROMTEXT',
473
        'GEOMCOLLFROMWKB',
474
        'GEOMETRYCOLLECTION',
475
        'GEOMETRYCOLLECTIONFROMTEXT',
476
        'GEOMETRYCOLLECTIONFROMWKB',
477
        'GEOMETRYFROMTEXT',
478
        'GEOMETRYFROMWKB',
479
        'GEOMETRYN',
480
        'GEOMETRYTYPE',
481
        'GEOMFROMTEXT',
482
        'GEOMFROMWKB',
483
        'GET_FORMAT',
484
        'GET_LOCK',
485
        'GLENGTH',
486
        'GREATEST',
487
        'GROUP_CONCAT',
488
        'GROUP_UNIQUE_USERS',
489
        'HEX',
490
        'HOUR',
491
        'IF',
492
        'IFNULL',
493
        'INET_ATON',
494
        'INET_NTOA',
495
        'INSERT',
496
        'INSTR',
497
        'INTERIORRINGN',
498
        'INTERSECTION',
499
        'INTERSECTS',
500
        'INTERVAL',
501
        'ISCLOSED',
502
        'ISEMPTY',
503
        'ISNULL',
504
        'ISRING',
505
        'ISSIMPLE',
506
        'IS_FREE_LOCK',
507
        'IS_USED_LOCK',
508
        'LAST_DAY',
509
        'LAST_INSERT_ID',
510
        'LCASE',
511
        'LEAST',
512
        'LEFT',
513
        'LENGTH',
514
        'LINEFROMTEXT',
515
        'LINEFROMWKB',
516
        'LINESTRING',
517
        'LINESTRINGFROMTEXT',
518
        'LINESTRINGFROMWKB',
519
        'LN',
520
        'LOAD_FILE',
521
        'LOCALTIME',
522
        'LOCALTIMESTAMP',
523
        'LOCATE',
524
        'LOG',
525
        'LOG10',
526
        'LOG2',
527
        'LOWER',
528
        'LPAD',
529
        'LTRIM',
530
        'MAKEDATE',
531
        'MAKETIME',
532
        'MAKE_SET',
533
        'MASTER_POS_WAIT',
534
        'MAX',
535
        'MBRCONTAINS',
536
        'MBRDISJOINT',
537
        'MBREQUAL',
538
        'MBRINTERSECTS',
539
        'MBROVERLAPS',
540
        'MBRTOUCHES',
541
        'MBRWITHIN',
542
        'MD5',
543
        'MICROSECOND',
544
        'MID',
545
        'MIN',
546
        'MINUTE',
547
        'MLINEFROMTEXT',
548
        'MLINEFROMWKB',
549
        'MOD',
550
        'MONTH',
551
        'MONTHNAME',
552
        'MPOINTFROMTEXT',
553
        'MPOINTFROMWKB',
554
        'MPOLYFROMTEXT',
555
        'MPOLYFROMWKB',
556
        'MULTILINESTRING',
557
        'MULTILINESTRINGFROMTEXT',
558
        'MULTILINESTRINGFROMWKB',
559
        'MULTIPOINT',
560
        'MULTIPOINTFROMTEXT',
561
        'MULTIPOINTFROMWKB',
562
        'MULTIPOLYGON',
563
        'MULTIPOLYGONFROMTEXT',
564
        'MULTIPOLYGONFROMWKB',
565
        'NAME_CONST',
566
        'NULLIF',
567
        'NUMGEOMETRIES',
568
        'NUMINTERIORRINGS',
569
        'NUMPOINTS',
570
        'OCT',
571
        'OCTET_LENGTH',
572
        'OLD_PASSWORD',
573
        'ORD',
574
        'OVERLAPS',
575
        'PASSWORD',
576
        'PERIOD_ADD',
577
        'PERIOD_DIFF',
578
        'PI',
579
        'POINT',
580
        'POINTFROMTEXT',
581
        'POINTFROMWKB',
582
        'POINTN',
583
        'POINTONSURFACE',
584
        'POLYFROMTEXT',
585
        'POLYFROMWKB',
586
        'POLYGON',
587
        'POLYGONFROMTEXT',
588
        'POLYGONFROMWKB',
589
        'POSITION',
590
        'POW',
591
        'POWER',
592
        'QUARTER',
593
        'QUOTE',
594
        'RADIANS',
595
        'RAND',
596
        'RELATED',
597
        'RELEASE_LOCK',
598
        'REPEAT',
599
        'REPLACE',
600
        'REVERSE',
601
        'RIGHT',
602
        'ROUND',
603
        'ROW_COUNT',
604
        'RPAD',
605
        'RTRIM',
606
        'SCHEMA',
607
        'SECOND',
608
        'SEC_TO_TIME',
609
        'SESSION_USER',
610
        'SHA',
611
        'SHA1',
612
        'SIGN',
613
        'SIN',
614
        'SLEEP',
615
        'SOUNDEX',
616
        'SPACE',
617
        'SQRT',
618
        'SRID',
619
        'STARTPOINT',
620
        'STD',
621
        'STDDEV',
622
        'STDDEV_POP',
623
        'STDDEV_SAMP',
624
        'STRCMP',
625
        'STR_TO_DATE',
626
        'SUBDATE',
627
        'SUBSTR',
628
        'SUBSTRING',
629
        'SUBSTRING_INDEX',
630
        'SUBTIME',
631
        'SUM',
632
        'SYMDIFFERENCE',
633
        'SYSDATE',
634
        'SYSTEM_USER',
635
        'TAN',
636
        'TIME',
637
        'TIMEDIFF',
638
        'TIMESTAMP',
639
        'TIMESTAMPADD',
640
        'TIMESTAMPDIFF',
641
        'TIME_FORMAT',
642
        'TIME_TO_SEC',
643
        'TOUCHES',
644
        'TO_DAYS',
645
        'TRIM',
646
        'TRUNCATE',
647
        'UCASE',
648
        'UNCOMPRESS',
649
        'UNCOMPRESSED_LENGTH',
650
        'UNHEX',
651
        'UNIQUE_USERS',
652
        'UNIX_TIMESTAMP',
653
        'UPDATEXML',
654
        'UPPER',
655
        'USER',
656
        'UTC_DATE',
657
        'UTC_TIME',
658
        'UTC_TIMESTAMP',
659
        'UUID',
660
        'VARIANCE',
661
        'VAR_POP',
662
        'VAR_SAMP',
663
        'VERSION',
664
        'WEEK',
665
        'WEEKDAY',
666
        'WEEKOFYEAR',
667
        'WITHIN',
668
        'X',
669
        'Y',
670
        'YEAR',
671
        'YEARWEEK'
672
    );
673
674
    // Punctuation that can be used as a boundary between other tokens
675
    protected static $boundaries = array(
676
        ',',
677
        ';',
678
        ':',
679
        ')',
680
        '(',
681
        '.',
682
        '=',
683
        '<',
684
        '>',
685
        '+',
686
        '-',
687
        '*',
688
        '/',
689
        '!',
690
        '^',
691
        '%',
692
        '|',
693
        '&',
694
        '#'
695
    );
696
697
    // For HTML syntax highlighting
698
    // Styles applied to different token types
699
    public static $quote_attributes = 'style="color: blue;"';
700
    public static $backtick_quote_attributes = 'style="color: purple;"';
701
    public static $reserved_attributes = 'style="font-weight:bold;"';
702
    public static $boundary_attributes = '';
703
    public static $number_attributes = 'style="color: green;"';
704
    public static $word_attributes = 'style="color: #333;"';
705
    public static $error_attributes = 'style="background-color: red;"';
706
    public static $comment_attributes = 'style="color: #aaa;"';
707
    public static $variable_attributes = 'style="color: orange;"';
708
    public static $pre_attributes = 'style="color: black; background-color: white;"';
709
710
    // Boolean - whether or not the current environment is the CLI
711
    // This affects the type of syntax highlighting
712
    // If not defined, it will be determined automatically
713
    public static $cli;
714
715
    // For CLI syntax highlighting
716
    public static $cli_quote = "\x1b[34;1m";
717
    public static $cli_backtick_quote = "\x1b[35;1m";
718
    public static $cli_reserved = "\x1b[37m";
719
    public static $cli_boundary = "";
720
    public static $cli_number = "\x1b[32;1m";
721
    public static $cli_word = "";
722
    public static $cli_error = "\x1b[31;1;7m";
723
    public static $cli_comment = "\x1b[30;1m";
724
    public static $cli_functions = "\x1b[37m";
725
    public static $cli_variable = "\x1b[36;1m";
726
727
    // The tab character to use when formatting SQL
728
    public static $tab = '  ';
729
730
    // This flag tells us if queries need to be enclosed in <pre> tags
731
    public static $use_pre = true;
732
733
    // This flag tells us if SqlFormatted has been initialized
734
    protected static $init;
735
736
    // Regular expressions for tokenizing
737
    protected static $regex_boundaries;
738
    protected static $regex_reserved;
739
    protected static $regex_reserved_newline;
740
    protected static $regex_reserved_toplevel;
741
    protected static $regex_function;
742
743
    // Cache variables
744
    // Only tokens shorter than this size will be cached.  Somewhere between 10 and 20 seems to work well for most cases.
745
    public static $max_cachekey_size = 15;
746
    protected static $token_cache = array();
747
    protected static $cache_hits = 0;
748
    protected static $cache_misses = 0;
749
750
    /**
751
     * Get stats about the token cache
752
     * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,integer>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
753
     */
754
    public static function getCacheStats()
755
    {
756
        return array(
757
            'hits'    => self::$cache_hits,
758
            'misses'  => self::$cache_misses,
759
            'entries' => count(self::$token_cache),
760
            'size'    => strlen(serialize(self::$token_cache))
761
        );
762
    }
763
764
    /**
765
     * Stuff that only needs to be done once.  Builds regular expressions and sorts the reserved words.
766
     */
767
    protected static function init()
768
    {
769
        if (self::$init) {
770
            return;
771
        }
772
773
        // Sort reserved word list from longest word to shortest, 3x faster than usort
774
        $reservedMap = array_combine(self::$reserved, array_map('strlen', self::$reserved));
775
        arsort($reservedMap);
776
        self::$reserved = array_keys($reservedMap);
777
778
        // Set up regular expressions
779
        self::$regex_boundaries = '(' . implode('|',
780
                array_map(array(__CLASS__, 'quote_regex'), self::$boundaries)) . ')';
781
        self::$regex_reserved = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved)) . ')';
782
        self::$regex_reserved_toplevel = str_replace(' ', '\\s+',
783
            '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_toplevel)) . ')');
784
        self::$regex_reserved_newline = str_replace(' ', '\\s+',
785
            '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$reserved_newline)) . ')');
786
787
        self::$regex_function = '(' . implode('|', array_map(array(__CLASS__, 'quote_regex'), self::$functions)) . ')';
788
789
        self::$init = true;
790
    }
791
792
    /**
793
     * Return the next token and token type in a SQL string.
794
     * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
795
     *
796
     * @param String $string The SQL string
797
     * @param array $previous The result of the previous getNextToken() call
0 ignored issues
show
Documentation introduced by
Should the type for parameter $previous not be array|null? Also, consider making the array more specific, something like array<String>, or String[].

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive. In addition it looks for parameters that have the generic type array and suggests a stricter type like array<String>.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
798
     *
799
     * @return Array An associative array containing the type and value of the token.
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<integer|string|null>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
800
     */
801
    protected static function getNextToken($string, $previous = null)
802
    {
803
        // Whitespace
804 View Code Duplication
        if (preg_match('/^\s+/', $string, $matches)) {
805
            return array(
806
                self::TOKEN_VALUE => $matches[0],
807
                self::TOKEN_TYPE  => self::TOKEN_TYPE_WHITESPACE
808
            );
809
        }
810
811
        // Comment
812
        if ($string[0] === '#' || (isset($string[1]) && ($string[0] === '-' && $string[1] === '-') || ($string[0] === '/' && $string[1] === '*'))) {
813
            // Comment until end of line
814
            if ($string[0] === '-' || $string[0] === '#') {
815
                $last = strpos($string, "\n");
816
                $type = self::TOKEN_TYPE_COMMENT;
817
            } else {
818
// Comment until closing comment tag
819
                $last = strpos($string, "*/", 2) + 2;
820
                $type = self::TOKEN_TYPE_BLOCK_COMMENT;
821
            }
822
823
            if ($last === false) {
824
                $last = strlen($string);
825
            }
826
827
            return array(
828
                self::TOKEN_VALUE => substr($string, 0, $last),
829
                self::TOKEN_TYPE  => $type
830
            );
831
        }
832
833
        // Quoted String
834
        if ($string[0] === '"' || $string[0] === '\'' || $string[0] === '`' || $string[0] === '[') {
835
            $return = array(
836
                self::TOKEN_TYPE  => (($string[0] === '`' || $string[0] === '[') ? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE),
837
                self::TOKEN_VALUE => self::getQuotedString($string)
838
            );
839
840
            return $return;
841
        }
842
843
        // User-defined Variable
844
        if (($string[0] === '@' || $string[0] === ':') && isset($string[1])) {
845
            $ret = array(
846
                self::TOKEN_VALUE => null,
847
                self::TOKEN_TYPE  => self::TOKEN_TYPE_VARIABLE
848
            );
849
850
            // If the variable name is quoted
851
            if ($string[1] === '"' || $string[1] === '\'' || $string[1] === '`') {
852
                $ret[self::TOKEN_VALUE] = $string[0] . self::getQuotedString(substr($string, 1));
853
            } // Non-quoted variable name
854
            else {
855
                preg_match('/^(' . $string[0] . '[a-zA-Z0-9\._\$]+)/', $string, $matches);
856
                if (!empty($matches)) {
857
                    $ret[self::TOKEN_VALUE] = $matches[1];
858
                }
859
            }
860
861
            if ($ret[self::TOKEN_VALUE] !== null) {
862
                return $ret;
863
            }
864
        }
865
866
        // Number (decimal, binary, or hex)
867 View Code Duplication
        if (preg_match('/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|' . self::$regex_boundaries . ')/',
868
            $string, $matches)) {
869
            return array(
870
                self::TOKEN_VALUE => $matches[1],
871
                self::TOKEN_TYPE  => self::TOKEN_TYPE_NUMBER
872
            );
873
        }
874
875
        // Boundary Character (punctuation and symbols)
876 View Code Duplication
        if (preg_match('/^(' . self::$regex_boundaries . ')/', $string, $matches)) {
877
            return array(
878
                self::TOKEN_VALUE => $matches[1],
879
                self::TOKEN_TYPE  => self::TOKEN_TYPE_BOUNDARY
880
            );
881
        }
882
883
        // A reserved word cannot be preceded by a '.'
884
        // this makes it so in "mytable.from", "from" is not considered a reserved word
885
        if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
886
            $upper = strtoupper($string);
887
            // Top Level Reserved Word
888 View Code Duplication
            if (preg_match('/^(' . self::$regex_reserved_toplevel . ')($|\s|' . self::$regex_boundaries . ')/', $upper,
889
                $matches)) {
890
                return array(
891
                    self::TOKEN_TYPE  => self::TOKEN_TYPE_RESERVED_TOPLEVEL,
892
                    self::TOKEN_VALUE => substr($string, 0, strlen($matches[1]))
893
                );
894
            }
895
            // Newline Reserved Word
896 View Code Duplication
            if (preg_match('/^(' . self::$regex_reserved_newline . ')($|\s|' . self::$regex_boundaries . ')/', $upper,
897
                $matches)) {
898
                return array(
899
                    self::TOKEN_TYPE  => self::TOKEN_TYPE_RESERVED_NEWLINE,
900
                    self::TOKEN_VALUE => substr($string, 0, strlen($matches[1]))
901
                );
902
            }
903
            // Other Reserved Word
904 View Code Duplication
            if (preg_match('/^(' . self::$regex_reserved . ')($|\s|' . self::$regex_boundaries . ')/', $upper,
905
                $matches)) {
906
                return array(
907
                    self::TOKEN_TYPE  => self::TOKEN_TYPE_RESERVED,
908
                    self::TOKEN_VALUE => substr($string, 0, strlen($matches[1]))
909
                );
910
            }
911
        }
912
913
        // A function must be suceeded by '('
914
        // this makes it so "count(" is considered a function, but "count" alone is not
915
        $upper = strtoupper($string);
916
        // function
917 View Code Duplication
        if (preg_match('/^(' . self::$regex_function . '[(]|\s|[)])/', $upper, $matches)) {
918
            return array(
919
                self::TOKEN_TYPE  => self::TOKEN_TYPE_RESERVED,
920
                self::TOKEN_VALUE => substr($string, 0, strlen($matches[1]) - 1)
921
            );
922
        }
923
924
        // Non reserved word
925
        preg_match('/^(.*?)($|\s|["\'`]|' . self::$regex_boundaries . ')/', $string, $matches);
926
927
        return array(
928
            self::TOKEN_VALUE => $matches[1],
929
            self::TOKEN_TYPE  => self::TOKEN_TYPE_WORD
930
        );
931
    }
932
933
    /**
934
     * @param $string
935
     * @return null
0 ignored issues
show
Documentation introduced by
Should the return type not be string|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
936
     */
937
    protected static function getQuotedString($string)
938
    {
939
        $ret = null;
940
941
        // This checks for the following patterns:
942
        // 1. backtick quoted string using `` to escape
943
        // 2. square bracket quoted string (SQL Server) using ]] to escape
944
        // 3. double quoted string using "" or \" to escape
945
        // 4. single quoted string using '' or \' to escape
946
        if (preg_match('/^(((`[^`]*($|`))+)|((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s',
947
            $string, $matches)) {
948
            $ret = $matches[1];
949
        }
950
951
        return $ret;
952
    }
953
954
    /**
955
     * Takes a SQL string and breaks it into tokens.
956
     * Each token is an associative array with type and value.
957
     *
958
     * @param String $string The SQL string
959
     *
960
     * @return Array An array of tokens.
961
     */
962
    protected static function tokenize($string)
963
    {
964
        self::init();
965
966
        $tokens = array();
967
968
        // Used to make sure the string keeps shrinking on each iteration
969
        $old_string_len = strlen($string) + 1;
970
971
        $token = null;
972
973
        $current_length = strlen($string);
974
975
        // Keep processing the string until it is empty
976
        while ($current_length) {
977
            // If the string stopped shrinking, there was a problem
978
            if ($old_string_len <= $current_length) {
979
                $tokens[] = array(
980
                    self::TOKEN_VALUE => $string,
981
                    self::TOKEN_TYPE  => self::TOKEN_TYPE_ERROR
982
                );
983
984
                return $tokens;
985
            }
986
            $old_string_len = $current_length;
987
988
            // Determine if we can use caching
989
            if ($current_length >= self::$max_cachekey_size) {
990
                $cacheKey = substr($string, 0, self::$max_cachekey_size);
991
            } else {
992
                $cacheKey = false;
993
            }
994
995
            // See if the token is already cached
996
            if ($cacheKey !== false && isset(self::$token_cache[$cacheKey])) {
997
                // Retrieve from cache
998
                $token = self::$token_cache[$cacheKey];
999
                $token_length = strlen($token[self::TOKEN_VALUE]);
1000
                self::$cache_hits++;
1001
            } else {
1002
                // Get the next token and the token type
1003
                $token = self::getNextToken($string, $token);
1004
                $token_length = strlen($token[self::TOKEN_VALUE]);
1005
                self::$cache_misses++;
1006
1007
                // If the token is shorter than the max length, store it in cache
1008
                if ($cacheKey !== false && $token_length < self::$max_cachekey_size) {
1009
                    self::$token_cache[$cacheKey] = $token;
1010
                }
1011
            }
1012
1013
            $tokens[] = $token;
1014
1015
            // Advance the string
1016
            $string = substr($string, $token_length);
1017
1018
            $current_length -= $token_length;
1019
        }
1020
1021
        return $tokens;
1022
    }
1023
1024
    /**
1025
     * Format the whitespace in a SQL string to make it easier to read.
1026
     *
1027
     * @param String $string The SQL string
1028
     * @param boolean $highlight If true, syntax highlighting will also be performed
1029
     *
1030
     * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
1031
     */
1032
    public static function format($string, $highlight = true)
1033
    {
1034
        // This variable will be populated with formatted html
1035
        $return = '';
1036
1037
        // Use an actual tab while formatting and then switch out with self::$tab at the end
1038
        $tab = "\t";
1039
1040
        $indent_level = 0;
1041
        $newline = false;
1042
        $inline_parentheses = false;
1043
        $increase_special_indent = false;
1044
        $increase_block_indent = false;
1045
        $indent_types = array();
1046
        $inline_count = 0;
1047
        $inline_indented = false;
1048
        $clause_limit = false;
1049
1050
        // Tokenize String
1051
        $original_tokens = self::tokenize($string);
1052
1053
        // Remove existing whitespace
1054
        $tokens = array();
1055
        foreach ($original_tokens as $i => $token) {
1056
            if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
1057
                $token['i'] = $i;
1058
                $tokens[] = $token;
1059
            }
1060
        }
1061
1062
        // Format token by token
1063
        foreach ($tokens as $i => $token) {
1064
            // Get highlighted token if doing syntax highlighting
1065
            if ($highlight) {
1066
                $highlighted = self::highlightToken($token);
1067
            } else {
1068
// If returning raw text
1069
                $highlighted = $token[self::TOKEN_VALUE];
1070
            }
1071
1072
            // If we are increasing the special indent level now
1073
            if ($increase_special_indent) {
1074
                $indent_level++;
1075
                $increase_special_indent = false;
1076
                array_unshift($indent_types, 'special');
1077
            }
1078
            // If we are increasing the block indent level now
1079
            if ($increase_block_indent) {
1080
                $indent_level++;
1081
                $increase_block_indent = false;
1082
                array_unshift($indent_types, 'block');
1083
            }
1084
1085
            // If we need a new line before the token
1086
            if ($newline) {
1087
                $return .= "\n" . str_repeat($tab, $indent_level);
1088
                $newline = false;
1089
                $added_newline = true;
1090
            } else {
1091
                $added_newline = false;
1092
            }
1093
1094
            // Display comments directly where they appear in the source
1095
            if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
1096
                if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
1097
                    $indent = str_repeat($tab, $indent_level);
1098
                    $return .= "\n" . $indent;
1099
                    $highlighted = str_replace("\n", "\n" . $indent, $highlighted);
1100
                }
1101
1102
                $return .= $highlighted;
1103
                $newline = true;
1104
                continue;
1105
            }
1106
1107
            if ($inline_parentheses) {
1108
                // End of inline parentheses
1109
                if ($token[self::TOKEN_VALUE] === ')') {
1110
                    $return = rtrim($return, ' ');
1111
1112
                    if ($inline_indented) {
1113
                        array_shift($indent_types);
1114
                        $indent_level--;
1115
                        $return .= "\n" . str_repeat($tab, $indent_level);
1116
                    }
1117
1118
                    $inline_parentheses = false;
1119
1120
                    $return .= $highlighted . ' ';
1121
                    continue;
1122
                }
1123
1124
                if ($token[self::TOKEN_VALUE] === ',') {
1125
                    if ($inline_count >= 30) {
1126
                        $inline_count = 0;
1127
                        $newline = true;
1128
                    }
1129
                }
1130
1131
                $inline_count += strlen($token[self::TOKEN_VALUE]);
1132
            }
1133
1134
            // Opening parentheses increase the block indent level and start a new line
1135
            if ($token[self::TOKEN_VALUE] === '(') {
1136
                // First check if this should be an inline parentheses block
1137
                // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
1138
                // Allow up to 3 non-whitespace tokens inside inline parentheses
1139
                $length = 0;
1140
                for ($j = 1; $j <= 250; $j++) {
1141
                    // Reached end of string
1142
                    if (!isset($tokens[$i + $j])) {
1143
                        break;
1144
                    }
1145
1146
                    $next = $tokens[$i + $j];
1147
1148
                    // Reached closing parentheses, able to inline it
1149
                    if ($next[self::TOKEN_VALUE] === ')') {
1150
                        $inline_parentheses = true;
1151
                        $inline_count = 0;
1152
                        $inline_indented = false;
1153
                        break;
1154
                    }
1155
1156
                    // Reached an invalid token for inline parentheses
1157
                    if ($next[self::TOKEN_VALUE] === ';' || $next[self::TOKEN_VALUE] === '(') {
1158
                        break;
1159
                    }
1160
1161
                    // Reached an invalid token type for inline parentheses
1162
                    if ($next[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
1163
                        break;
1164
                    }
1165
1166
                    $length += strlen($next[self::TOKEN_VALUE]);
1167
                }
1168
1169
                if ($inline_parentheses && $length > 30) {
1170
                    $increase_block_indent = true;
1171
                    $inline_indented = true;
1172
                    $newline = true;
1173
                }
1174
1175
                // Take out the preceding space unless there was whitespace there in the original query
1176 View Code Duplication
                if (isset($original_tokens[$token['i'] - 1]) && $original_tokens[$token['i'] - 1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
1177
                    $return = rtrim($return, ' ');
1178
                }
1179
1180
                if (!$inline_parentheses) {
1181
                    $increase_block_indent = true;
1182
                    // Add a newline after the parentheses
1183
                    $newline = true;
1184
                }
1185
1186
            } // Closing parentheses decrease the block indent level
1187
            elseif ($token[self::TOKEN_VALUE] === ')') {
1188
                // Remove whitespace before the closing parentheses
1189
                $return = rtrim($return, ' ');
1190
1191
                $indent_level--;
1192
1193
                // Reset indent level
1194
                while ($j = array_shift($indent_types)) {
1195
                    if ($j === 'special') {
1196
                        $indent_level--;
1197
                    } else {
1198
                        break;
1199
                    }
1200
                }
1201
1202
                if ($indent_level < 0) {
1203
                    // This is an error
1204
                    $indent_level = 0;
1205
1206
                    if ($highlight) {
1207
                        $return .= "\n" . self::highlightError($token[self::TOKEN_VALUE]);
1208
                        continue;
1209
                    }
1210
                }
1211
1212
                // Add a newline before the closing parentheses (if not already added)
1213
                if (!$added_newline) {
1214
                    $return .= "\n" . str_repeat($tab, $indent_level);
1215
                }
1216
            } // Top level reserved words start a new line and increase the special indent level
1217
            elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
1218
                $increase_special_indent = true;
1219
1220
                // If the last indent type was 'special', decrease the special indent for this round
1221
                reset($indent_types);
1222
                if (current($indent_types) === 'special') {
1223
                    $indent_level--;
1224
                    array_shift($indent_types);
1225
                }
1226
1227
                // Add a newline after the top level reserved word
1228
                $newline = true;
1229
                // Add a newline before the top level reserved word (if not already added)
1230
                if (!$added_newline) {
1231
                    $return .= "\n" . str_repeat($tab, $indent_level);
1232
                } // If we already added a newline, redo the indentation since it may be different now
1233
                else {
1234
                    $return = rtrim($return, $tab) . str_repeat($tab, $indent_level);
1235
                }
1236
1237
                // If the token may have extra whitespace
1238 View Code Duplication
                if (strpos($token[self::TOKEN_VALUE], ' ') !== false || strpos($token[self::TOKEN_VALUE],
1239
                        "\n") !== false || strpos($token[self::TOKEN_VALUE], "\t") !== false
1240
                ) {
1241
                    $highlighted = preg_replace('/\s+/', ' ', $highlighted);
1242
                }
1243
                //if SQL 'LIMIT' clause, start variable to reset newline
1244
                if ($token[self::TOKEN_VALUE] === 'LIMIT' && !$inline_parentheses) {
1245
                    $clause_limit = true;
1246
                }
1247
            } // Checks if we are out of the limit clause
1248
            elseif ($clause_limit && $token[self::TOKEN_VALUE] !== "," && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_NUMBER && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
1249
                $clause_limit = false;
1250
            } // Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause)
1251
            elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
1252
                //If the previous TOKEN_VALUE is 'LIMIT', resets new line
1253
                if ($clause_limit === true) {
1254
                    $newline = false;
1255
                    $clause_limit = false;
1256
                } // All other cases of commas
1257
                else {
1258
                    $newline = true;
1259
                }
1260
            } // Newline reserved words start a new line
1261
            elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) {
1262
                // Add a newline before the reserved word (if not already added)
1263
                if (!$added_newline) {
1264
                    $return .= "\n" . str_repeat($tab, $indent_level);
1265
                }
1266
1267
                // If the token may have extra whitespace
1268 View Code Duplication
                if (strpos($token[self::TOKEN_VALUE], ' ') !== false || strpos($token[self::TOKEN_VALUE],
1269
                        "\n") !== false || strpos($token[self::TOKEN_VALUE], "\t") !== false
1270
                ) {
1271
                    $highlighted = preg_replace('/\s+/', ' ', $highlighted);
1272
                }
1273
            } // Multiple boundary characters in a row should not have spaces between them (not including parentheses)
1274
            elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
1275
                if (isset($tokens[$i - 1]) && $tokens[$i - 1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
1276 View Code Duplication
                    if (isset($original_tokens[$token['i'] - 1]) && $original_tokens[$token['i'] - 1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
1277
                        $return = rtrim($return, ' ');
1278
                    }
1279
                }
1280
            }
1281
1282
            // If the token shouldn't have a space before it
1283 View Code Duplication
            if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
1284
                $return = rtrim($return, ' ');
1285
            }
1286
1287
            $return .= $highlighted . ' ';
1288
1289
            // If the token shouldn't have a space after it
1290 View Code Duplication
            if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') {
1291
                $return = rtrim($return, ' ');
1292
            }
1293
1294
            // If this is the "-" of a negative number, it shouldn't have a space after it
1295
            if ($token[self::TOKEN_VALUE] === '-' && isset($tokens[$i + 1]) && $tokens[$i + 1][self::TOKEN_TYPE] === self::TOKEN_TYPE_NUMBER && isset($tokens[$i - 1])) {
1296
                $prev = $tokens[$i - 1][self::TOKEN_TYPE];
1297
                if ($prev !== self::TOKEN_TYPE_QUOTE && $prev !== self::TOKEN_TYPE_BACKTICK_QUOTE && $prev !== self::TOKEN_TYPE_WORD && $prev !== self::TOKEN_TYPE_NUMBER) {
1298
                    $return = rtrim($return, ' ');
1299
                }
1300
            }
1301
        }
1302
1303
        // If there are unmatched parentheses
1304
        if ($highlight && array_search('block', $indent_types) !== false) {
1305
            $return .= "\n" . self::highlightError("WARNING: unclosed parentheses or section");
1306
        }
1307
1308
        // Replace tab characters with the configuration tab character
1309
        $return = trim(str_replace("\t", self::$tab, $return));
1310
1311
        if ($highlight) {
1312
            $return = self::output($return);
1313
        }
1314
1315
        return $return;
1316
    }
1317
1318
    /**
1319
     * Add syntax highlighting to a SQL string
1320
     *
1321
     * @param String $string The SQL string
1322
     *
1323
     * @return String The SQL string with HTML styles applied
1324
     */
1325
    public static function highlight($string)
1326
    {
1327
        $tokens = self::tokenize($string);
1328
1329
        $return = '';
1330
1331
        foreach ($tokens as $token) {
1332
            $return .= self::highlightToken($token);
1333
        }
1334
1335
        return self::output($return);
1336
    }
1337
1338
    /**
1339
     * Split a SQL string into multiple queries.
1340
     * Uses ";" as a query delimiter.
1341
     *
1342
     * @param String $string The SQL string
1343
     *
1344
     * @return Array An array of individual query strings without trailing semicolons
1345
     */
1346
    public static function splitQuery($string)
1347
    {
1348
        $queries = array();
1349
        $current_query = '';
1350
        $empty = true;
1351
1352
        $tokens = self::tokenize($string);
1353
1354
        foreach ($tokens as $token) {
1355
            // If this is a query separator
1356
            if ($token[self::TOKEN_VALUE] === ';') {
1357
                if (!$empty) {
1358
                    $queries[] = $current_query . ';';
1359
                }
1360
                $current_query = '';
1361
                $empty = true;
1362
                continue;
1363
            }
1364
1365
            // If this is a non-empty character
1366
            if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
1367
                $empty = false;
1368
            }
1369
1370
            $current_query .= $token[self::TOKEN_VALUE];
1371
        }
1372
1373
        if (!$empty) {
1374
            $queries[] = trim($current_query);
1375
        }
1376
1377
        return $queries;
1378
    }
1379
1380
    /**
1381
     * Remove all comments from a SQL string
1382
     *
1383
     * @param String $string The SQL string
1384
     *
1385
     * @return String The SQL string without comments
1386
     */
1387
    public static function removeComments($string)
1388
    {
1389
        $result = '';
1390
1391
        $tokens = self::tokenize($string);
1392
1393
        foreach ($tokens as $token) {
1394
            // Skip comment tokens
1395
            if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
1396
                continue;
1397
            }
1398
1399
            $result .= $token[self::TOKEN_VALUE];
1400
        }
1401
        $result = self::format($result, false);
1402
1403
        return $result;
1404
    }
1405
1406
    /**
1407
     * Compress a query by collapsing white space and removing comments
1408
     *
1409
     * @param String $string The SQL string
1410
     *
1411
     * @return String The SQL string without comments
1412
     */
1413
    public static function compress($string)
1414
    {
1415
        $result = '';
1416
1417
        $tokens = self::tokenize($string);
1418
1419
        $whitespace = true;
1420
        foreach ($tokens as $token) {
1421
            // Skip comment tokens
1422
            if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
1423
                continue;
1424
            } // Remove extra whitespace in reserved words (e.g "OUTER     JOIN" becomes "OUTER JOIN")
1425
            elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
1426
                $token[self::TOKEN_VALUE] = preg_replace('/\s+/', ' ', $token[self::TOKEN_VALUE]);
1427
            }
1428
1429
            if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
1430
                // If the last token was whitespace, don't add another one
1431
                if ($whitespace) {
1432
                    continue;
1433
                } else {
1434
                    $whitespace = true;
1435
                    // Convert all whitespace to a single space
1436
                    $token[self::TOKEN_VALUE] = ' ';
1437
                }
1438
            } else {
1439
                $whitespace = false;
1440
            }
1441
1442
            $result .= $token[self::TOKEN_VALUE];
1443
        }
1444
1445
        return rtrim($result);
1446
    }
1447
1448
    /**
1449
     * Highlights a token depending on its type.
1450
     *
1451
     * @param Array $token An associative array containing type and value.
1452
     *
1453
     * @return String HTML code of the highlighted token.
1454
     */
1455
    protected static function highlightToken($token)
1456
    {
1457
        $type = $token[self::TOKEN_TYPE];
1458
1459
        if (self::is_cli()) {
1460
            $token = $token[self::TOKEN_VALUE];
1461
        } else {
1462
            if (defined('ENT_IGNORE')) {
1463
                $token = htmlentities($token[self::TOKEN_VALUE], ENT_COMPAT | ENT_IGNORE, 'UTF-8');
1464
            } else {
1465
                $token = htmlentities($token[self::TOKEN_VALUE], ENT_COMPAT, 'UTF-8');
1466
            }
1467
        }
1468
1469
        if ($type === self::TOKEN_TYPE_BOUNDARY) {
1470
            return self::highlightBoundary($token);
1471
        } elseif ($type === self::TOKEN_TYPE_WORD) {
1472
            return self::highlightWord($token);
1473
        } elseif ($type === self::TOKEN_TYPE_BACKTICK_QUOTE) {
1474
            return self::highlightBacktickQuote($token);
1475
        } elseif ($type === self::TOKEN_TYPE_QUOTE) {
1476
            return self::highlightQuote($token);
1477
        } elseif ($type === self::TOKEN_TYPE_RESERVED) {
1478
            return self::highlightReservedWord($token);
1479
        } elseif ($type === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
1480
            return self::highlightReservedWord($token);
1481
        } elseif ($type === self::TOKEN_TYPE_RESERVED_NEWLINE) {
1482
            return self::highlightReservedWord($token);
1483
        } elseif ($type === self::TOKEN_TYPE_NUMBER) {
1484
            return self::highlightNumber($token);
1485
        } elseif ($type === self::TOKEN_TYPE_VARIABLE) {
1486
            return self::highlightVariable($token);
1487
        } elseif ($type === self::TOKEN_TYPE_COMMENT || $type === self::TOKEN_TYPE_BLOCK_COMMENT) {
1488
            return self::highlightComment($token);
1489
        }
1490
1491
        return $token;
1492
    }
1493
1494
    /**
1495
     * Highlights a quoted string
1496
     *
1497
     * @param String $value The token's value
1498
     *
1499
     * @return String HTML code of the highlighted token.
1500
     */
1501
    protected static function highlightQuote($value)
1502
    {
1503
        if (self::is_cli()) {
1504
            return self::$cli_quote . $value . "\x1b[0m";
1505
        } else {
1506
            return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
1507
        }
1508
    }
1509
1510
    /**
1511
     * Highlights a backtick quoted string
1512
     *
1513
     * @param String $value The token's value
1514
     *
1515
     * @return String HTML code of the highlighted token.
1516
     */
1517
    protected static function highlightBacktickQuote($value)
1518
    {
1519
        if (self::is_cli()) {
1520
            return self::$cli_backtick_quote . $value . "\x1b[0m";
1521
        } else {
1522
            return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
1523
        }
1524
    }
1525
1526
    /**
1527
     * Highlights a reserved word
1528
     *
1529
     * @param String $value The token's value
1530
     *
1531
     * @return String HTML code of the highlighted token.
1532
     */
1533
    protected static function highlightReservedWord($value)
1534
    {
1535
        if (self::is_cli()) {
1536
            return self::$cli_reserved . $value . "\x1b[0m";
1537
        } else {
1538
            return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
1539
        }
1540
    }
1541
1542
    /**
1543
     * Highlights a boundary token
1544
     *
1545
     * @param String $value The token's value
1546
     *
1547
     * @return String HTML code of the highlighted token.
1548
     */
1549
    protected static function highlightBoundary($value)
1550
    {
1551
        if ($value === '(' || $value === ')') {
1552
            return $value;
1553
        }
1554
1555
        if (self::is_cli()) {
1556
            return self::$cli_boundary . $value . "\x1b[0m";
1557
        } else {
1558
            return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
1559
        }
1560
    }
1561
1562
    /**
1563
     * Highlights a number
1564
     *
1565
     * @param String $value The token's value
1566
     *
1567
     * @return String HTML code of the highlighted token.
1568
     */
1569
    protected static function highlightNumber($value)
1570
    {
1571
        if (self::is_cli()) {
1572
            return self::$cli_number . $value . "\x1b[0m";
1573
        } else {
1574
            return '<span ' . self::$number_attributes . '>' . $value . '</span>';
1575
        }
1576
    }
1577
1578
    /**
1579
     * Highlights an error
1580
     *
1581
     * @param String $value The token's value
1582
     *
1583
     * @return String HTML code of the highlighted token.
1584
     */
1585
    protected static function highlightError($value)
1586
    {
1587
        if (self::is_cli()) {
1588
            return self::$cli_error . $value . "\x1b[0m";
1589
        } else {
1590
            return '<span ' . self::$error_attributes . '>' . $value . '</span>';
1591
        }
1592
    }
1593
1594
    /**
1595
     * Highlights a comment
1596
     *
1597
     * @param String $value The token's value
1598
     *
1599
     * @return String HTML code of the highlighted token.
1600
     */
1601
    protected static function highlightComment($value)
1602
    {
1603
        if (self::is_cli()) {
1604
            return self::$cli_comment . $value . "\x1b[0m";
1605
        } else {
1606
            return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
1607
        }
1608
    }
1609
1610
    /**
1611
     * Highlights a word token
1612
     *
1613
     * @param String $value The token's value
1614
     *
1615
     * @return String HTML code of the highlighted token.
1616
     */
1617
    protected static function highlightWord($value)
1618
    {
1619
        if (self::is_cli()) {
1620
            return self::$cli_word . $value . "\x1b[0m";
1621
        } else {
1622
            return '<span ' . self::$word_attributes . '>' . $value . '</span>';
1623
        }
1624
    }
1625
1626
    /**
1627
     * Highlights a variable token
1628
     *
1629
     * @param String $value The token's value
1630
     *
1631
     * @return String HTML code of the highlighted token.
1632
     */
1633
    protected static function highlightVariable($value)
1634
    {
1635
        if (self::is_cli()) {
1636
            return self::$cli_variable . $value . "\x1b[0m";
1637
        } else {
1638
            return '<span ' . self::$variable_attributes . '>' . $value . '</span>';
1639
        }
1640
    }
1641
1642
    /**
1643
     * Helper function for building regular expressions for reserved words and boundary characters
1644
     *
1645
     * @param String $a The string to be quoted
1646
     *
1647
     * @return String The quoted string
1648
     */
1649
    private static function quote_regex($a)
1650
    {
1651
        return preg_quote($a, '/');
1652
    }
1653
1654
    /**
1655
     * Helper function for building string output
1656
     *
1657
     * @param String $string The string to be quoted
1658
     *
1659
     * @return String The quoted string
1660
     */
1661
    private static function output($string)
1662
    {
1663
        if (self::is_cli()) {
1664
            return $string . "\n";
1665
        } else {
1666
            $string = trim($string);
1667
            if (!self::$use_pre) {
1668
                return $string;
1669
            }
1670
1671
            return '<pre ' . self::$pre_attributes . '>' . $string . '</pre>';
1672
        }
1673
    }
1674
1675
    /**
1676
     * @return bool
1677
     */
1678
    private static function is_cli()
1679
    {
1680
        if (isset(self::$cli)) {
1681
            return self::$cli;
1682
        } else {
1683
            return php_sapi_name() === 'cli';
1684
        }
1685
    }
1686
1687
    /**
1688
     * Returns syntax highlighted SQL command.
1689
     *
1690
     *
1691
     * @param string $sql
1692
     * @param array $bindings
1693
     * @param \PDO $pdo
0 ignored issues
show
Documentation introduced by
Should the type for parameter $pdo not be null|PDO?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
1694
     * @return string
1695
     */
1696
    public static function prepare($sql, array $bindings = [], PDO $pdo = null)
1697
    {
1698
        // insert new lines
1699
        $sql = " $sql ";
1700
        $sql = preg_replace('#(?<=[\\s,(])('.static::KEYWORDS1.')(?=[\\s,)])#i', "\n\$1", $sql);
1701
1702
        // reduce spaces
1703
        $sql = preg_replace('#[ \t]{2,}#', ' ', $sql);
1704
1705
        // syntax highlight
1706
        $sql = htmlspecialchars($sql, ENT_IGNORE, 'UTF-8');
1707
        $sql = preg_replace_callback('#(/\\*.+?\\*/)|(\\*\\*.+?\\*\\*)|(?<=[\\s,(])('.static::KEYWORDS1.')(?=[\\s,)])|(?<=[\\s,(=])('.static::KEYWORDS2.')(?=[\\s,)=])#is', function ($matches) {
1708
            if (! empty($matches[1])) { // comment
1709
                return '<em style="color:gray">'.$matches[1].'</em>';
1710
            } elseif (! empty($matches[2])) { // error
1711
                return '<strong style="color:red">'.$matches[2].'</strong>';
1712
            } elseif (! empty($matches[3])) { // most important keywords
1713
                return '<strong style="color:blue; text-transform: uppercase;">'.$matches[3].'</strong>';
1714
            } elseif (! empty($matches[4])) { // other keywords
1715
                return '<strong style="color:green">'.$matches[4].'</strong>';
1716
            }
1717
        }, $sql);
1718
1719
        $bindings = array_map(function ($binding) use ($pdo) {
1720
            if (is_array($binding) === true) {
1721
                $binding = implode(', ', array_map(function ($value) {
1722
                    return is_string($value) === true ? htmlspecialchars('\''.$value.'\'', ENT_NOQUOTES, 'UTF-8') : $value;
1723
                }, $binding));
1724
1725
                return htmlspecialchars('('.$binding.')', ENT_NOQUOTES, 'UTF-8');
1726
            }
1727
1728
            if (is_string($binding) === true && (preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $binding) || preg_last_error())) {
1729
                return '<i title="Length '.strlen($binding).' bytes">&lt;binary&gt;</i>';
1730
            }
1731
1732
            if (is_string($binding) === true) {
1733
                $text = htmlspecialchars($pdo ? $pdo->quote($binding) : '\''.$binding.'\'', ENT_NOQUOTES, 'UTF-8');
1734
1735
                return '<span title="Length '.strlen($text).' characters">'.$text.'</span>';
1736
            }
1737
1738
            if (is_resource($binding) === true) {
1739
                $type = get_resource_type($binding);
1740
                if ($type === 'stream') {
1741
                    $info = stream_get_meta_data($binding);
1742
                }
1743
1744
                return '<i'.(isset($info['uri']) ? ' title="'.htmlspecialchars($info['uri'], ENT_NOQUOTES, 'UTF-8').'"' : null)
1745
                    .'>&lt;'.htmlspecialchars($type, ENT_NOQUOTES, 'UTF-8').' resource&gt;</i>';
1746
            }
1747
1748
            if ($binding instanceof DateTime) {
1749
                return htmlspecialchars('\''.$binding->format('Y-m-d H:i:s').'\'', ENT_NOQUOTES, 'UTF-8');
1750
            }
1751
1752
            return htmlspecialchars($binding, ENT_NOQUOTES, 'UTF-8');
1753
        }, $bindings);
1754
        $sql = str_replace(['%', '?'], ['%%', '%s'], $sql);
1755
1756
        return '<div><code>'.nl2br(trim(vsprintf($sql, $bindings))).'</code></div>';
1757
    }
1758
1759
    /**
1760
     * perform quer analysis hint.
1761
     *
1762
     * @param string $sql
1763
     * @param string $version
0 ignored issues
show
Documentation introduced by
Should the type for parameter $version not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
1764
     * @param float $driver
0 ignored issues
show
Documentation introduced by
Should the type for parameter $driver not be double|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
1765
     * @return array
1766
     */
1767
    public static function performQueryAnalysis($sql, $version = null, $driver = null)
1768
    {
1769
        $hints = [];
1770
        if (preg_match('/^\\s*SELECT\\s*`?[a-zA-Z0-9]*`?\\.?\\*/i', $sql)) {
1771
            $hints[] = 'Use <code>SELECT *</code> only if you need all columns from table';
1772
        }
1773
        if (preg_match('/ORDER BY RAND()/i', $sql)) {
1774
            $hints[] = '<code>ORDER BY RAND()</code> is slow, try to avoid if you can.
1775
                You can <a href="http://stackoverflow.com/questions/2663710/how-does-mysqls-order-by-rand-work">read this</a>
1776
                or <a href="http://stackoverflow.com/questions/1244555/how-can-i-optimize-mysqls-order-by-rand-function">this</a>';
1777
        }
1778
        if (strpos($sql, '!=') !== false) {
1779
            $hints[] = 'The <code>!=</code> operator is not standard. Use the <code>&lt;&gt;</code> operator to test for inequality instead.';
1780
        }
1781
        if (stripos($sql, 'WHERE') === false) {
1782
            $hints[] = 'The <code>SELECT</code> statement has no <code>WHERE</code> clause and could examine many more rows than intended';
1783
        }
1784
        if (preg_match('/LIMIT\\s/i', $sql) && stripos($sql, 'ORDER BY') === false) {
1785
            $hints[] = '<code>LIMIT</code> without <code>ORDER BY</code> causes non-deterministic results, depending on the query execution plan';
1786
        }
1787
        if (preg_match('/LIKE\\s[\'"](%.*?)[\'"]/i', $sql, $matches)) {
1788
            $hints[] = 'An argument has a leading wildcard character: <code>'.$matches[1].'</code>.
1789
                The predicate with this argument is not sargable and cannot use an index if one exists.';
1790
        }
1791
        if ($version < 5.5 && $driver === 'mysql') {
1792
            if (preg_match('/\\sIN\\s*\\(\\s*SELECT/i', $sql)) {
1793
                $hints[] = '<code>IN()</code> and <code>NOT IN()</code> subqueries are poorly optimized in that MySQL version : '.$version.
1794
                    '. MySQL executes the subquery as a dependent subquery for each row in the outer query';
1795
            }
1796
        }
1797
1798
        return $hints;
1799
    }
1800
1801
    /**
1802
     * explain sql.
1803
     *
1804
     * @param PDO $pdo
1805
     * @param string $sql
1806
     * @param array $bindings
1807
     * @return array
1808
     */
1809
    public static function explain(PDO $pdo, $sql, $bindings = [])
1810
    {
1811
        $explains = [];
1812
        if (preg_match('#\s*\(?\s*SELECT\s#iA', $sql)) {
1813
            $statement = $pdo->prepare('EXPLAIN '.$sql);
1814
            $statement->execute($bindings);
1815
            $explains = $statement->fetchAll(PDO::FETCH_CLASS);
1816
        }
1817
1818
        return $explains;
1819
    }
1820
}
1821