Passed
Push — master ( 578412...e442d2 )
by Teye
05:30
created

HasInputFormat::tsvFormat()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 16
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 7
c 1
b 0
f 0
dl 0
loc 16
ccs 3
cts 3
cp 1
rs 10
cc 1
nc 1
nop 5
crap 1
1
<?php
2
declare(strict_types=1);
3
4
namespace Level23\Druid\Concerns;
5
6
use Level23\Druid\InputFormats\FlattenSpec;
7
use Level23\Druid\InputFormats\CsvInputFormat;
8
use Level23\Druid\InputFormats\TsvInputFormat;
9
use Level23\Druid\InputFormats\OrcInputFormat;
10
use Level23\Druid\InputFormats\JsonInputFormat;
11
use Level23\Druid\InputFormats\ParquetInputFormat;
12
use Level23\Druid\InputFormats\ProtobufInputFormat;
13
use Level23\Druid\InputFormats\InputFormatInterface;
14
15
trait HasInputFormat
16
{
17
    /**
18
     * @var \Level23\Druid\InputFormats\InputFormatInterface|null
19
     */
20
    protected ?InputFormatInterface $inputFormat = null;
21
22
    /**
23
     * Specify that we use JSON as input format.
24
     *
25
     * @param FlattenSpec|null        $flattenSpec Specifies flattening configuration for nested JSON data. See
26
     *                                             flattenSpec for more info.
27
     * @param array<string,bool>|null $features    JSON parser features supported by Jackson library. Those features
28
     *                                             will be applied when parsing the input JSON data.
29
     *
30
     * @see https://github.com/FasterXML/jackson-core/wiki/JsonParser-Features
31
     */
32 1
    public function jsonFormat(?FlattenSpec $flattenSpec = null, ?array $features = null): self
33
    {
34 1
        $this->inputFormat = new JsonInputFormat($flattenSpec, $features);
35
36 1
        return $this;
37
    }
38
39
    /**
40
     * Specify that we use CSV as input format.
41
     *
42
     * @param string[]|null $columns               Specifies the columns of the data. The columns should be in the same
43
     *                                             order with the columns of your data.
44
     * @param string|null   $listDelimiter         A custom delimiter for multi-value dimensions.
45
     * @param bool|null     $findColumnsFromHeader If this is set, the task will find the column names from the header
46
     *                                             row. Note that skipHeaderRows will be applied before finding column
47
     *                                             names from the header. For example, if you set skipHeaderRows to 2
48
     *                                             and findColumnsFromHeader to true, the task will skip the first two
49
     *                                             lines and then extract column information from the third line.
50
     *                                             columns will be ignored if this is set to true.
51
     * @param int           $skipHeaderRows        If this is set, the task will skip the first skipHeaderRows rows.
52
     */
53 1
    public function csvFormat(
54
        ?array $columns = null,
55
        ?string $listDelimiter = null,
56
        ?bool $findColumnsFromHeader = null,
57
        int $skipHeaderRows = 0
58
    ): self {
59 1
        $this->inputFormat = new CsvInputFormat($columns, $listDelimiter, $findColumnsFromHeader, $skipHeaderRows);
60
61 1
        return $this;
62
    }
63
64
    /**
65
     * Specify that we use TSV as input format.
66
     *
67
     * @param array<string>|null $columns               Specifies the columns of the data. The columns should be in the
68
     *                                                  same order with the columns of your data.
69
     * @param string|null        $delimiter             A custom delimiter for data values.
70
     * @param string|null        $listDelimiter         A custom delimiter for multi-value dimensions.
71
     * @param bool|null          $findColumnsFromHeader If this is set, the task will find the column names from the
72
     *                                                  header row. Note that skipHeaderRows will be applied before
73
     *                                                  finding column names from the header. For example, if you set
74
     *                                                  skipHeaderRows to 2 and findColumnsFromHeader to true, the task
75
     *                                                  will skip the first two lines and then extract column
76
     *                                                  information from the third line. columns will be ignored if
77
     *                                                  this is set to true.
78
     * @param int                $skipHeaderRows        If this is set, the task will skip the first skipHeaderRows
79
     *                                                  rows.
80
     */
81 1
    public function tsvFormat(
82
        ?array $columns = null,
83
        ?string $delimiter = null,
84
        ?string $listDelimiter = null,
85
        ?bool $findColumnsFromHeader = null,
86
        int $skipHeaderRows = 0
87
    ): self {
88 1
        $this->inputFormat = new TsvInputFormat(
89
            $columns,
90
            $delimiter,
91
            $listDelimiter,
92
            $findColumnsFromHeader,
93
            $skipHeaderRows
94
        );
95
96 1
        return $this;
97
    }
98
99
    /**
100
     * Specify that we use ORC as input format.
101
     *
102
     * To use the ORC input format, load the Druid Orc extension ( druid-orc-extensions).
103
     *
104
     * @param FlattenSpec|null $flattenSpec    Specifies flattening configuration for nested ORC data. See flattenSpec
105
     *                                         for more info.
106
     * @param bool|null        $binaryAsString Specifies if the binary orc column which is not logically marked as a
107
     *                                         string should be treated as a UTF-8 encoded string. Default is false.
108
     */
109 1
    public function orcFormat(?FlattenSpec $flattenSpec = null, ?bool $binaryAsString = null): self
110
    {
111 1
        $this->inputFormat = new OrcInputFormat($flattenSpec, $binaryAsString);
112
113 1
        return $this;
114
    }
115
116
    /**
117
     * Specify that we use Parquet as input format.
118
     *
119
     * To use the Parquet input format load the Druid Parquet extension (druid-parquet-extensions).
120
     *
121
     * @param FlattenSpec|null $flattenSpec    Define a flattenSpec to extract nested values from a Parquet file. Note
122
     *                                         that only 'path' expression are supported ('jq' is unavailable).
123
     * @param bool|null        $binaryAsString Specifies if the bytes parquet column which is not logically marked as a
124
     *                                         string or enum type should be treated as a UTF-8 encoded string.
125
     */
126 1
    public function parquetFormat(?FlattenSpec $flattenSpec = null, ?bool $binaryAsString = null): self
127
    {
128 1
        $this->inputFormat = new ParquetInputFormat($flattenSpec, $binaryAsString);
129
130 1
        return $this;
131
    }
132
133
    /**
134
     * Specify that we use Protobuf as input format.
135
     *
136
     * You need to include the druid-protobuf-extensions as an extension to use the Protobuf input format.
137
     *
138
     * @param array<string,string> $protoBytesDecoder Specifies how to decode bytes to Protobuf record. See below for
139
     *                                                an example.
140
     * @param FlattenSpec|null     $flattenSpec       Define a flattenSpec to extract nested values from a Parquet
141
     *                                                file. Note that only 'path' expression are supported ('jq' is
142
     *                                                unavailable).
143
     *
144
     * Example $protoBytesDecoder value:
145
     * ```
146
     * [
147
     *     "type" => "file",
148
     *     "descriptor" => "file:///tmp/metrics.desc",
149
     *     "protoMessageType" => "Metrics"
150
     * ]
151
     * ```
152
     *
153
     * @see https://druid.apache.org/docs/latest/ingestion/data-formats.html#protobuf
154
     */
155 1
    public function protobufFormat(array $protoBytesDecoder, ?FlattenSpec $flattenSpec = null): self
156
    {
157 1
        $this->inputFormat = new ProtobufInputFormat($protoBytesDecoder, $flattenSpec);
158
159 1
        return $this;
160
    }
161
}