RedshiftDialect::getEncoding()   B
last analyzed

Complexity

Conditions 7
Paths 7

Size

Total Lines 21
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 21
c 0
b 0
f 0
rs 7.551
cc 7
eloc 16
nc 7
nop 1
1
<?php
2
3
namespace Graze\DataDb\Dialect;
4
5
use Aws\Credentials\CredentialsInterface;
6
use Graze\DataDb\Formatter\SyntaxFormatterInterface;
7
use Graze\DataDb\TableNodeInterface;
8
use Graze\DataFile\Format\CsvFormatInterface;
9
use Graze\DataFile\Helper\Builder\BuilderAwareInterface;
10
use Graze\DataFile\Modify\Compress\CompressionAwareInterface;
11
use Graze\DataFile\Modify\Compress\CompressionFactory;
12
use Graze\DataFile\Modify\Compress\Gzip;
13
use Graze\DataFile\Modify\Encoding\EncodingAwareInterface;
14
use Graze\DataFile\Node\FileNodeInterface;
15
use InvalidArgumentException;
16
use League\Flysystem\AwsS3v3\AwsS3Adapter;
17
18
class RedshiftDialect extends AbstractDialect implements BuilderAwareInterface
19
{
20
    const DEFAULT_TIMEZONE = 'Europe/London';
21
22
    /**
23
     * @var string
24
     */
25
    private $timezone;
26
27
    /**
28
     * MysqlDialect constructor.
29
     *
30
     * @param SyntaxFormatterInterface|null $formatter
31
     * @param string|null                   $timezone
32
     */
33
    public function __construct(SyntaxFormatterInterface $formatter = null, $timezone = null)
34
    {
35
        $this->timezone = $timezone ?: static::DEFAULT_TIMEZONE;
36
        parent::__construct($formatter);
37
    }
38
39
    /**
40
     * @return string
41
     */
42
    public function getIdentifierQuote()
43
    {
44
        return '"';
45
    }
46
47
    /**
48
     * @param TableNodeInterface $old
49
     * @param TableNodeInterface $new
50
     *
51
     * @return array [sql, bind]
52
     */
53 View Code Duplication
    public function getCreateTableLike(TableNodeInterface $old, TableNodeInterface $new)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
54
    {
55
        return [
56
            $this->format(
57
                'CREATE TABLE {new:schema|q}.{new:table|q} (LIKE {old:schema|q}.{old:table|q})',
58
                ['old' => $old, 'new' => $new,]
59
            ),
60
            [],
61
        ];
62
    }
63
64
    /**
65
     * @param TableNodeInterface $source
66
     * @param TableNodeInterface $join
67
     * @param string             $on
68
     * @param string|null        $where
69
     *
70
     * @return array [sql, bind]
71
     */
72
    public function getDeleteTableJoinSoftDelete(
73
        TableNodeInterface $source,
74
        TableNodeInterface $join,
75
        $on,
76
        $where = null
77
    ) {
78
        return [
79
            $this->format(
80
                'UPDATE {source:schema|q}.{source:table|q}
81
                SET{softUpdated}
82
                    {source:softDeleted|q} = CONVERT_TIMEZONE(\'UTC\', \'{timezone}\', GETDATE())
83
                FROM {join:schema|q}.{join:table|q}
84
                WHERE {on}
85
                      {where}',
86
                [
87
                    'source'      => $source,
88
                    'join'        => $join,
89
                    'on'          => $on,
90
                    'softUpdated' => ($source->getSoftUpdated() ?
91
                        $this->format(
92
                            ' {source:softUpdated|q} = CONVERT_TIMEZONE(\'UTC\', \'{timezone}\', GETDATE()),',
93
                            ['source' => $source, 'timezone' => $this->timezone]
94
                        ) : ''
95
                    ),
96
                    'where'       => ($where ? sprintf('AND (%s)', $where) : ''),
97
                    'timezone'    => $this->timezone,
98
                ]
99
            ),
100
            [],
101
        ];
102
    }
103
104
    /**
105
     * @param TableNodeInterface $source
106
     * @param TableNodeInterface $join
107
     * @param string             $on
108
     * @param string|null        $where
109
     *
110
     * @return array [sql, bind]
111
     */
112 View Code Duplication
    public function getDeleteTableJoin(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
113
        TableNodeInterface $source,
114
        TableNodeInterface $join,
115
        $on,
116
        $where = null
117
    ) {
118
        return [
119
            $this->format(
120
                'DELETE FROM
121
                    {source:schema|q}.{source:table|q}
122
                USING
123
                    {join:schema|q}.{join:table|q}
124
                WHERE
125
                    {on}
126
                    {where}',
127
                [
128
                    'source' => $source,
129
                    'join'   => $join,
130
                    'on'     => $on,
131
                    'where'  => ($where ? sprintf('AND (%s)', $where) : ''),
132
                ]
133
            ),
134
            [],
135
        ];
136
    }
137
138
    /**
139
     * @param TableNodeInterface $table
140
     * @param string|null               $where
141
     *
142
     * @return array [sql, bind]
143
     */
144
    public function getDeleteFromTableSoftDelete(TableNodeInterface $table, $where = null)
145
    {
146
        return [
147
            $this->format(
148
                'UPDATE {table:schema|q}.{table:table|q}
149
                    SET{softUpdated}
150
                        {table:softDeleted|q} = CONVERT_TIMEZONE(\'UTC\', \'{timezone}\', GETDATE())
151
                    {where}',
152
                [
153
                    'table'       => $table,
154
                    'softUpdated' => ($table->getSoftUpdated() ?
155
                        $this->format(
156
                            ' {table:softUpdated|q} = CONVERT_TIMEZONE(\'UTC\', \'{timezone}\', GETDATE()),',
157
                            ['table' => $table, 'timezone' => $this->timezone]
158
                        ) :
159
                        ''),
160
                    'where'       => ($where ? 'WHERE ' . $where : ''),
161
                    'timezone'    => $this->timezone,
162
                ]
163
            ),
164
            [],
165
        ];
166
    }
167
168
    /**
169
     * @param TableNodeInterface $table
170
     * @param array              $columns
171
     * @param array              $primary
172
     * @param array              $index
173
     *
174
     * @return array [sql, bind]
175
     */
176
    public function getCreateTable(TableNodeInterface $table, array $columns, array $primary, array $index)
177
    {
178
        return [
179
            $this->format(
180
                "CREATE TABLE {table:schema|q}.{table:table|q} (\n  {columns}\n)\n{primary}\n{index}",
181
                [
182
                    'table'   => $table,
183
                    'columns' => implode(",\n  ", $columns),
184
                    'primary' => (count($primary) > 0 ? $primary[0] : ''),
185
                    'index'   => (count($index) > 0 ? implode("\n  ", $index) : ''),
186
                ]
187
            ),
188
            [],
189
        ];
190
    }
191
192
    /**
193
     * @param array $column
194
     *
195
     * @return string
196
     */
197 View Code Duplication
    public function getColumnDefinition(array $column)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
198
    {
199
        return $this->format(
200
            '{column|q} {type}{notnull}',
201
            array_merge($column, ['notnull' => $column['nullable'] ? '' : ' NOT NULL'])
202
        );
203
    }
204
205
    /**
206
     * @param array $key
207
     *
208
     * @return string
209
     */
210
    public function getPrimaryKeyDefinition(array $key)
211
    {
212
        return $this->format('DISTKEY({column|q})', $key);
213
    }
214
215
    /**
216
     * @param array $index
217
     *
218
     * @return string
219
     */
220
    public function getIndexDefinition(array $index)
221
    {
222
        return $this->format('SORTKEY({column|q})', $index);
223
    }
224
225
    /**
226
     * @param TableNodeInterface $table
227
     *
228
     * @return array [sql, bind]
229
     */
230
    public function getDescribeTable(TableNodeInterface $table)
231
    {
232
        return [
233
            'SELECT *
234
             FROM pg_table_def
235
             WHERE schemaname = ?
236
               AND tablename = ?',
237
            [
238
                $table->getSchema(),
239
                $table->getTable(),
240
            ],
241
        ];
242
    }
243
244
    /**
245
     * @param TableNodeInterface $table
246
     *
247
     * @return array [sql, bind]
248
     */
249
    public function getCreateSyntax(TableNodeInterface $table)
250
    {
251
        $sql = <<<SQL
252
SELECT
253
  ddl
254
FROM
255
(
256
 SELECT
257
  schemaname
258
  ,tablename
259
  ,seq
260
  ,ddl
261
 FROM
262
 (
263
     --DROP TABLE
264
  SELECT
265
   n.nspname AS schemaname
266
   ,c.relname AS tablename
267
   ,1 AS seq
268
   ,'--DROP TABLE "' + n.nspname + '"."' + c.relname + '";' AS ddl
269
  FROM pg_namespace AS n
270
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
271
  WHERE c.relkind = 'r'
272
    --CREATE TABLE
273
  UNION SELECT
274
   n.nspname AS schemaname
275
   ,c.relname AS tablename
276
   ,2 AS seq
277
   ,'CREATE TABLE IF NOT EXISTS "' + n.nspname + '"."' + c.relname + '"' AS ddl
278
  FROM pg_namespace AS n
279
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
280
  WHERE c.relkind = 'r'
281
    --OPEN PAREN COLUMN LIST
282
        UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 5 AS seq, '(' AS ddl
283
  FROM pg_namespace AS n
284
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
285
  WHERE c.relkind = 'r'
286
    --COLUMN LIST
287
        UNION SELECT
288
   schemaname
289
   ,tablename
290
   ,seq
291
   ,'\t' + col_delim + col_name + ' ' + col_datatype + ' ' + col_nullable + ' ' + col_default + ' ' + col_encoding AS ddl
292
  FROM
293
  (
294
      SELECT
295
    n.nspname AS schemaname
296
    ,c.relname AS tablename
297
    ,100000000 + a.attnum AS seq
298
    ,CASE WHEN a.attnum > 1 THEN ',' ELSE '' END AS col_delim
299
    ,'"' + a.attname + '"' AS col_name
300
    ,CASE WHEN STRPOS(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING') > 0
301
      THEN REPLACE(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING', 'VARCHAR')
302
     WHEN STRPOS(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER') > 0
303
      THEN REPLACE(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER', 'CHAR')
304
     ELSE UPPER(format_type(a.atttypid, a.atttypmod))
305
     END AS col_datatype
306
    ,CASE WHEN format_encoding((a.attencodingtype)::integer) = 'none'
307
     THEN ''
308
     ELSE 'ENCODE ' + format_encoding((a.attencodingtype)::integer)
309
     END AS col_encoding
310
    ,CASE WHEN a.atthasdef IS TRUE THEN 'DEFAULT ' + adef.adsrc ELSE '' END AS col_default
311
    ,CASE WHEN a.attnotnull IS TRUE THEN 'NOT NULL' ELSE '' END AS col_nullable
312
   FROM pg_namespace AS n
313
   INNER JOIN pg_class AS c ON n.oid = c.relnamespace
314
   INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
315
   LEFT OUTER JOIN pg_attrdef AS adef ON a.attrelid = adef.adrelid AND a.attnum = adef.adnum
316
   WHERE c.relkind = 'r'
317
    AND a.attnum > 0
318
   ORDER BY a.attnum
319
   )
320
  --CONSTRAINT LIST
321
        UNION (SELECT
322
        n.nspname AS schemaname
323
   ,c.relname AS tablename
324
   ,200000000 + CAST(con.oid AS INT) AS seq
325
   ,'\t,' + pg_get_constraintdef(con.oid) AS ddl
326
  FROM pg_constraint AS con
327
  INNER JOIN pg_class AS c ON c.relnamespace = con.connamespace AND c.relfilenode = con.conrelid
328
  INNER JOIN pg_namespace AS n ON n.oid = c.relnamespace
329
  WHERE c.relkind = 'r' AND pg_get_constraintdef(con.oid) NOT LIKE 'FOREIGN KEY%'
330
  ORDER BY seq)
331
  --CLOSE PAREN COLUMN LIST
332
        UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 299999999 AS seq, ')' AS ddl
333
  FROM pg_namespace AS n
334
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
335
  WHERE c.relkind = 'r'
336
    --DISTSTYLE
337
  UNION SELECT
338
   n.nspname AS schemaname
339
   ,c.relname AS tablename
340
   ,300000000 AS seq
341
   ,CASE WHEN c.reldiststyle = 0 THEN 'DISTSTYLE EVEN'
342
    WHEN c.reldiststyle = 1 THEN 'DISTSTYLE KEY'
343
    WHEN c.reldiststyle = 8 THEN 'DISTSTYLE ALL'
344
    ELSE '<<Error - UNKNOWN DISTSTYLE>>'
345
    END AS ddl
346
  FROM pg_namespace AS n
347
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
348
  WHERE c.relkind = 'r'
349
    --DISTKEY COLUMNS
350
  UNION SELECT
351
   n.nspname AS schemaname
352
   ,c.relname AS tablename
353
   ,400000000 + a.attnum AS seq
354
   ,'DISTKEY ("' + a.attname + '")' AS ddl
355
  FROM pg_namespace AS n
356
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
357
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
358
  WHERE c.relkind = 'r'
359
    AND a.attisdistkey IS TRUE
360
    AND a.attnum > 0
361
    --SORTKEY COLUMNS
362
  UNION select schemaname, tablename, seq,
363
       case when min_sort <0 then 'INTERLEAVED SORTKEY (' else 'SORTKEY (' end as ddl
364
from (SELECT
365
   n.nspname AS schemaname
366
   ,c.relname AS tablename
367
   ,499999999 AS seq
368
   ,min(attsortkeyord) min_sort FROM pg_namespace AS n
369
  INNER JOIN  pg_class AS c ON n.oid = c.relnamespace
370
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
371
  WHERE c.relkind = 'r'
372
    AND abs(a.attsortkeyord) > 0
373
    AND a.attnum > 0
374
  group by 1,2,3 )
375
  UNION (SELECT
376
   n.nspname AS schemaname
377
   ,c.relname AS tablename
378
   ,500000000 + abs(a.attsortkeyord) AS seq
379
   ,CASE WHEN abs(a.attsortkeyord) = 1
380
    THEN '\t"' + a.attname + '"'
381
    ELSE '\t, "' + a.attname + '"'
382
    END AS ddl
383
  FROM  pg_namespace AS n
384
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
385
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
386
  WHERE c.relkind = 'r'
387
    AND abs(a.attsortkeyord) > 0
388
    AND a.attnum > 0
389
  ORDER BY abs(a.attsortkeyord))
390
  UNION SELECT
391
   n.nspname AS schemaname
392
   ,c.relname AS tablename
393
   ,599999999 AS seq
394
   ,'\t)' AS ddl
395
  FROM pg_namespace AS n
396
  INNER JOIN  pg_class AS c ON n.oid = c.relnamespace
397
  INNER JOIN  pg_attribute AS a ON c.oid = a.attrelid
398
  WHERE c.relkind = 'r'
399
    AND a.attsortkeyord > 0
400
    AND a.attnum > 0
401
    --END SEMICOLON
402
  UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 600000000 AS seq, ';' AS ddl
403
  FROM  pg_namespace AS n
404
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
405
  WHERE c.relkind = 'r' )
406
  UNION (
407
      SELECT n.nspname AS schemaname,
408
       'zzzzzzzz' AS tablename,
409
       700000000 + CAST(con.oid AS INT) AS seq,
410
       'ALTER TABLE ' + c.relname + ' ADD ' + pg_get_constraintdef(con.oid)::VARCHAR(1024) + ';' AS ddl
411
    FROM pg_constraint AS con
412
      INNER JOIN pg_class AS c
413
              ON c.relnamespace = con.connamespace
414
    AND c.relfilenode = con.conrelid
415
      INNER JOIN pg_namespace AS n ON n.oid = c.relnamespace
416
    WHERE c.relkind = 'r'
417
    AND   pg_get_constraintdef (con.oid) LIKE 'FOREIGN KEY%'
418
    ORDER BY seq
419
  )
420
 ORDER BY schemaname, tablename, seq
421
 )
422
 WHERE schemaname = ?
423
   AND tablename = ?
424
 ORDER BY ddl ASC
425
SQL;
426
427
        return [$sql, [$table->getSchema(), $table->getTable()]];
428
    }
429
430
    /**
431
     * @param TableNodeInterface $table
432
     * @param FileNodeInterface  $file
433
     * @param CsvFormatInterface $format
434
     * @param bool               $truncateColumns
435
     * @param int                $maxErrors
436
     * @param string             $timeFormat
437
     * @param string             $dateFormat
438
     *
439
     * @return array [sql, bind]
440
     */
441
    public function getImportFromCsv(
442
        TableNodeInterface $table,
443
        FileNodeInterface $file,
444
        CsvFormatInterface $format,
445
        $truncateColumns = true,
446
        $maxErrors = 0,
447
        $timeFormat = 'YYYY-MM-DD HH:MI:SS',
448
        $dateFormat = 'YYYY-MM-DD'
449
    ) {
450
        $credentials = $this->getS3CredentialsFromFile($file);
451
        $bucket = $this->getS3BucketFromFile($file);
452
453 View Code Duplication
        if ($table->getColumns()) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
454
            $columns = implode(',', array_map(function ($column) {
455
                return $this->format('{column|q}', ['column' => $column]);
456
            }, $table->getColumns()));
457
        } else {
458
            $columns = '';
459
        }
460
461
        $bind = [
462
            sprintf('s3://%s/%s', $bucket, $file->getPath()),
463
            sprintf(
464
                'aws_access_key_id=%s;aws_secret_access_key=%s',
465
                $credentials->getAccessKeyId(),
466
                $credentials->getSecretKey()
467
            ),
468
            $format->getDelimiter(),
469
            $format->getNullValue(),
470
            $maxErrors,
471
            $timeFormat,
472
            $dateFormat,
473
        ];
474
475
        if ($format->hasEscape()) {
476
            $csvFormat = ($format->hasQuote() ? 'REMOVEQUOTES' : '') .
477
                " ESCAPE";
478
        } else {
479
            $csvFormat = "CSV QUOTE AS ?";
480
            $bind[] = $format->getQuote();
481
        }
482
483
        $truncateColumns = $truncateColumns ? "TRUNCATECOLUMNS" : '';
484
485
        if ($format->getDataStart() > 1) {
486
            $ignoreHeaders = "IGNOREHEADERS AS ?";
487
            $bind[] = $format->getDataStart() - 1;
488
        } else {
489
            $ignoreHeaders = '';
490
        }
491
492
        $compression = $this->getCompression($file);
493
        $fileEncoding = $this->getEncoding($file);
494
        $encoding = '';
495
        if ($fileEncoding) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fileEncoding of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
496
            $encoding = 'ENCODING AS ?';
497
            $bind[] = $fileEncoding;
498
        }
499
500
        $query = <<<SQL
501
COPY {table:schema|q}.{table:table|q}
502
{columns}
503
FROM ?
504
WITH CREDENTIALS AS ?
505
FORMAT
506
DELIMITER AS ?
507
NULL AS ?
508
COMPUPDATE ON
509
ACCEPTANYDATE
510
IGNOREBLANKLINES
511
MAXERROR AS ?
512
TIMEFORMAT AS ?
513
DATEFORMAT AS ?
514
{csvFormat}
515
{truncateColumns}
516
{ignoreHeaders}
517
{compression}
518
{encoding}
519
SQL;
520
521
        return [
522
            $this->format(
523
                $query,
524
                compact('table', 'columns', 'csvFormat', 'truncateColumns', 'ignoreHeaders', 'compression', 'encoding')
525
            ),
526
            $bind,
527
        ];
528
    }
529
530
    /**
531
     * @param TableNodeInterface $table
532
     * @param FileNodeInterface  $file
533
     * @param int                $maxErrors
534
     * @param string             $timeFormat
535
     * @param string             $dateFormat
536
     *
537
     * @return array [sql, bind]
538
     */
539
    public function getImportFromJson(
540
        TableNodeInterface $table,
541
        FileNodeInterface $file,
542
        $maxErrors = 0,
543
        $timeFormat = 'YYYY-MM-DD HH:MI:SS',
544
        $dateFormat = 'YYYY-MM-DD'
545
    ) {
546
        $credentials = $this->getS3CredentialsFromFile($file);
547
        $bucket = $this->getS3BucketFromFile($file);
548
549 View Code Duplication
        if ($table->getColumns()) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
550
            $columns = implode(',', array_map(function ($column) {
551
                return $this->format('{column|q}', ['column' => $column]);
552
            }, $table->getColumns()));
553
        } else {
554
            $columns = '';
555
        }
556
557
        $bind = [
558
            sprintf('s3://%s/%s', $bucket, $file->getPath()),
559
            sprintf(
560
                'aws_access_key_id=%s;aws_secret_access_key=%s',
561
                $credentials->getAccessKeyId(),
562
                $credentials->getSecretKey()
563
            ),
564
            $maxErrors,
565
            $timeFormat,
566
            $dateFormat,
567
        ];
568
569
        $compression = $this->getCompression($file);
570
        $fileEncoding = $this->getEncoding($file);
571
        $encoding = '';
572
        if ($fileEncoding) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fileEncoding of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
573
            $encoding = 'ENCODING AS ?';
574
            $bind[] = $fileEncoding;
575
        }
576
577
        $query = <<<SQL
578
COPY {table:schema|q}.{table:table|q}
579
{columns}
580
FROM ?
581
WITH CREDENTIALS AS ?
582
FORMAT
583
JSON AS 'auto'
584
COMPUPDATE ON
585
ACCEPTANYDATE
586
MAXERROR AS ?
587
TIMEFORMAT AS ?
588
DATEFORMAT AS ?
589
{compression}
590
{encoding}
591
SQL;
592
593
        return [
594
            $this->format(
595
                $query,
596
                compact('table', 'columns', 'compression', 'encoding', 'test')
597
            ),
598
            $bind,
599
        ];
600
    }
601
602
    /**
603
     * @param string             $sql
604
     * @param FileNodeInterface  $file
605
     * @param CsvFormatInterface $format
606
     *
607
     * @return array [sql, bind]
608
     */
609
    public function getExportToCsv($sql, FileNodeInterface $file, CsvFormatInterface $format)
610
    {
611
        $credentials = $this->getS3CredentialsFromFile($file);
612
        $bucket = $this->getS3BucketFromFile($file);
613
614
        $bind = [
615
            $sql,
616
            sprintf('s3://%s/%s', $bucket, $file->getPath()),
617
            sprintf(
618
                'aws_access_key_id=%s;aws_secret_access_key=%s',
619
                $credentials->getAccessKeyId(),
620
                $credentials->getSecretKey()
621
            ),
622
            $format->getDelimiter(),
623
            $format->getNullValue(),
624
        ];
625
626
        $query = <<<CSV
627
UNLOAD
628
(?)
629
TO ?
630
CREDENTIALS ?
631
DELIMITER ?
632
NULL AS ?
633
{addQuotes}
634
{escape}
635
{compression}
636
{encoding}
637
CSV;
638
639
        $addQuotes = ($format->hasQuote() ? 'ADDQUOTES' : '');
640
        $escape = ($format->hasEscape() ? 'ESCAPE' : '');
641
        $compression = $this->getCompression($file);
642
        $fileEncoding = $this->getEncoding($file);
643
        $encoding = '';
644
        if ($fileEncoding) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fileEncoding of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
645
            $encoding = 'ENCODING AS ?';
646
            $bind[] = $fileEncoding;
647
        }
648
649
        return [
650
            $this->format(
651
                $query,
652
                compact('escapedSql', 'addQuotes', 'escape', 'compression', 'encoding')
653
            ),
654
            $bind,
655
        ];
656
    }
657
658
    /**
659
     * @param object $entity
660
     *
661
     * @return string
662
     */
663
    private function getCompression($entity)
664
    {
665
        if ($entity instanceof CompressionAwareInterface) {
666
            switch (strtolower($entity->getCompression())) {
667
                case Gzip::NAME:
668
                    return 'GZIP';
669
                case 'bzip2':
670
                    return 'BZIP2';
671
                case 'lzop':
672
                    return 'LZOP';
673
                case CompressionFactory::TYPE_NONE:
674
                case CompressionFactory::TYPE_UNKNOWN:
675
                    return '';
676
                default:
677
                    throw new InvalidArgumentException("Redshift is unable to handle a {$entity->getCompression()} compressed file");
678
            }
679
        }
680
681
        return '';
682
    }
683
684
    /**
685
     * @param object $entity
686
     *
687
     * @return string|null
688
     */
689
    private function getEncoding($entity)
690
    {
691
        if ($entity instanceof EncodingAwareInterface) {
692
            switch (strtolower($entity->getEncoding())) {
693
                case 'utf-8':
694
                    return 'UTF8';
695
                case 'utf-16le':
696
                    return 'UTF16LE';
697
                case 'utf-16be':
698
                    return 'UTF16BE';
699
                case 'utf-16':
700
                    return 'UTF16';
701
                case '':
702
                    return null;
703
                default:
704
                    throw new InvalidArgumentException("Redshift is unable to handle a {$entity->getEncoding()} encoded file");
705
            }
706
        }
707
708
        return null;
709
    }
710
711
    /**
712
     * @param FileNodeInterface $file
713
     *
714
     * @return CredentialsInterface
715
     */
716
    private function getS3CredentialsFromFile(FileNodeInterface $file)
717
    {
718
        $adapter = $this->getS3Adapter($file);
719
        /** @var CredentialsInterface $credentials */
720
        return $adapter->getClient()->getCredentials()->wait(true);
721
    }
722
723
    /**
724
     * @param FileNodeInterface $file
725
     *
726
     * @return string
727
     */
728
    private function getS3BucketFromFile(FileNodeInterface $file)
729
    {
730
        $adapter = $this->getS3Adapter($file);
731
        return $adapter->getBucket();
732
    }
733
734
    /**
735
     * @param FileNodeInterface $file
736
     *
737
     * @return AwsS3Adapter
738
     */
739
    private function getS3Adapter(FileNodeInterface $file)
740
    {
741
        $adapter = $file->getFilesystem()->getAdapter();
742
        if (!$adapter instanceof AwsS3Adapter) {
743
            throw new InvalidArgumentException("The supplied file: $file is not a S3 location");
744
        }
745
        return $adapter;
746
    }
747
}
748