1
|
|
|
<?php |
2
|
|
|
declare(strict_types=1); |
3
|
|
|
|
4
|
|
|
namespace Level23\Druid\TuningConfig; |
5
|
|
|
|
6
|
|
|
class TuningConfig implements TuningConfigInterface |
7
|
|
|
{ |
8
|
|
|
/** |
9
|
|
|
* @var array |
10
|
|
|
*/ |
11
|
|
|
protected $properties = []; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* TuningConfig constructor. |
15
|
|
|
* |
16
|
|
|
* @param array $properties |
17
|
|
|
*/ |
18
|
11 |
|
public function __construct(array $properties = []) |
19
|
|
|
{ |
20
|
11 |
|
foreach ($properties as $key => $value) { |
21
|
|
|
|
22
|
9 |
|
$method = 'set' . $key; |
23
|
|
|
|
24
|
9 |
|
$callable = [$this, $method]; |
25
|
9 |
|
if (!is_callable($callable)) { |
26
|
2 |
|
$this->properties[$key] = $value; |
27
|
2 |
|
continue; |
28
|
|
|
} |
29
|
|
|
|
30
|
7 |
|
call_user_func($callable, $value); |
31
|
|
|
} |
32
|
11 |
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Return the context as it can be used in the druid query. |
36
|
|
|
* |
37
|
|
|
* @return array |
38
|
|
|
*/ |
39
|
9 |
|
public function toArray(): array |
40
|
|
|
{ |
41
|
9 |
|
return $this->properties; |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* The task type |
46
|
|
|
* |
47
|
|
|
* @return $this |
48
|
|
|
* @var string |
49
|
|
|
* @required |
50
|
|
|
*/ |
51
|
7 |
|
public function setType(string $type) |
52
|
|
|
{ |
53
|
7 |
|
$this->properties['type'] = $type; |
54
|
|
|
|
55
|
7 |
|
return $this; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Used in sharding. Determines how many rows are in each segment. |
60
|
|
|
* Default: 5000000 |
61
|
|
|
* |
62
|
|
|
* @return $this |
63
|
|
|
* @var int |
64
|
|
|
*/ |
65
|
3 |
|
public function setMaxRowsPerSegment(int $maxRowsPerSegment) |
66
|
|
|
{ |
67
|
3 |
|
$this->properties['maxRowsPerSegment'] = $maxRowsPerSegment; |
68
|
|
|
|
69
|
3 |
|
return $this; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* Used in determining when intermediate persists to disk should occur. Normally user does not need to set this, |
74
|
|
|
* but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million |
75
|
|
|
* rows in memory and this value should be set. |
76
|
|
|
* |
77
|
|
|
* Default: 1000000 |
78
|
|
|
* |
79
|
|
|
* @return $this |
80
|
|
|
* @var int |
81
|
|
|
*/ |
82
|
2 |
|
public function setMaxRowsInMemory(int $maxRowsInMemory) |
83
|
|
|
{ |
84
|
2 |
|
$this->properties['maxRowsInMemory'] = $maxRowsInMemory; |
85
|
|
|
|
86
|
2 |
|
return $this; |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
/** |
90
|
|
|
* Used in determining when intermediate persists to disk should occur. Normally this is computed internally and |
91
|
|
|
* user does not need to set it. This value represents number of bytes to aggregate in heap memory before |
92
|
|
|
* persisting. This is based on a rough estimate of memory usage and not actual usage. The maximum heap memory |
93
|
|
|
* usage for indexing is maxBytesInMemory * (2 + maxPendingPersists) |
94
|
|
|
* |
95
|
|
|
* Default: 1/6 of max JVM memory |
96
|
|
|
* |
97
|
|
|
* @return $this |
98
|
|
|
* @var int |
99
|
|
|
*/ |
100
|
1 |
|
public function setMaxBytesInMemory(int $maxBytesInMemory) |
101
|
|
|
{ |
102
|
1 |
|
$this->properties['maxBytesInMemory'] = $maxBytesInMemory; |
103
|
|
|
|
104
|
1 |
|
return $this; |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Total number of rows in segments waiting for being pushed. Used in determining when intermediate pushing should |
109
|
|
|
* occur. |
110
|
|
|
* |
111
|
|
|
* Default: 20000000 |
112
|
|
|
* |
113
|
|
|
* @return $this |
114
|
|
|
* @var int |
115
|
|
|
*/ |
116
|
1 |
|
public function setMaxTotalRows(int $maxTotalRows) |
117
|
|
|
{ |
118
|
1 |
|
$this->properties['maxTotalRows'] = $maxTotalRows; |
119
|
|
|
|
120
|
1 |
|
return $this; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
/** |
124
|
|
|
* Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the |
125
|
|
|
* granularitySpec, the index task can skip the determine intervals/partitions pass through the data. numShards |
126
|
|
|
* cannot be specified if maxRowsPerSegment is set. |
127
|
|
|
* |
128
|
|
|
* Default: null |
129
|
|
|
* |
130
|
|
|
* @return $this |
131
|
|
|
* @var int |
132
|
|
|
*/ |
133
|
1 |
|
public function setNumShards(int $numShards) |
134
|
|
|
{ |
135
|
1 |
|
$this->properties['numShards'] = $numShards; |
136
|
|
|
|
137
|
1 |
|
return $this; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Used to give a hint to control the amount of data that each first phase task reads. |
142
|
|
|
* This hint could be ignored depending on the implementation of the input source. See Split hint spec for more |
143
|
|
|
* details. |
144
|
|
|
* |
145
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/native-batch.html#split-hint-spec |
146
|
|
|
* |
147
|
|
|
* @param array $splitHintSpec |
148
|
|
|
* |
149
|
|
|
* @return \Level23\Druid\TuningConfig\TuningConfig |
150
|
|
|
*/ |
151
|
1 |
|
public function setSplitHintSpec(array $splitHintSpec) |
152
|
|
|
{ |
153
|
1 |
|
$this->properties['splitHintSpec'] = $splitHintSpec; |
154
|
|
|
|
155
|
1 |
|
return $this; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Defines how to partition data in each timeChunk, see PartitionsSpec |
160
|
|
|
* |
161
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/native-batch.html#partitionsspec |
162
|
|
|
* |
163
|
|
|
* @param array $partitionsSpec |
164
|
|
|
* |
165
|
|
|
* @return $this |
166
|
|
|
*/ |
167
|
1 |
|
public function setPartitionsSpec(array $partitionsSpec) |
168
|
|
|
{ |
169
|
1 |
|
$this->properties['partitionsSpec'] = $partitionsSpec; |
170
|
|
|
|
171
|
1 |
|
return $this; |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* Defines segment storage format options to be used at indexing time |
176
|
|
|
* |
177
|
|
|
* @see https://druid.apache.org/docs/latest/ingestion/native_tasks.html#indexspec |
178
|
|
|
* |
179
|
|
|
* @var array |
180
|
|
|
* @return $this |
181
|
|
|
*/ |
182
|
1 |
|
public function setIndexSpec(array $indexSpec) |
183
|
|
|
{ |
184
|
1 |
|
$this->properties['indexSpec'] = $indexSpec; |
185
|
|
|
|
186
|
1 |
|
return $this; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* Defines segment storage format options to be used at indexing time for intermediate persisted temporary segments. |
191
|
|
|
* This can be used to disable dimension/metric compression on intermediate segments to reduce memory required for |
192
|
|
|
* final merging. however, disabling compression on intermediate segments might increase page cache use while they |
193
|
|
|
* are used before getting merged into final segment published, see IndexSpec for possible values. |
194
|
|
|
* |
195
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/index.html#indexspec |
196
|
|
|
* |
197
|
|
|
* @param array $indexSpecForIntermediatePersists |
198
|
|
|
* |
199
|
|
|
* @return $this |
200
|
|
|
*/ |
201
|
1 |
|
public function setIndexSpecForIntermediatePersists(array $indexSpecForIntermediatePersists) |
202
|
|
|
{ |
203
|
1 |
|
$this->properties['indexSpecForIntermediatePersists'] = $indexSpecForIntermediatePersists; |
204
|
|
|
|
205
|
1 |
|
return $this; |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new |
210
|
|
|
* intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory |
211
|
|
|
* usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists). |
212
|
|
|
* |
213
|
|
|
* Default: 0 (meaning one persist can be running concurrently with ingestion, and none can be queued up) |
214
|
|
|
* |
215
|
|
|
* @return $this |
216
|
|
|
* @var int |
217
|
|
|
*/ |
218
|
1 |
|
public function setMaxPendingPersists(int $maxPendingPersists) |
219
|
|
|
{ |
220
|
1 |
|
$this->properties['maxPendingPersists'] = $maxPendingPersists; |
221
|
|
|
|
222
|
1 |
|
return $this; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable |
227
|
|
|
* rows and fields will be skipped. |
228
|
|
|
* |
229
|
|
|
* Default: false |
230
|
|
|
* |
231
|
|
|
* @return $this |
232
|
|
|
* @var bool |
233
|
|
|
*/ |
234
|
1 |
|
public function setReportParseExceptions(bool $reportParseExceptions) |
235
|
|
|
{ |
236
|
1 |
|
$this->properties['reportParseExceptions'] = $reportParseExceptions; |
237
|
|
|
|
238
|
1 |
|
return $this; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Forces guaranteeing the perfect rollup. The perfect rollup optimizes the total size of generated segments and |
243
|
|
|
* querying time while indexing time will be increased. If this is set to true, intervals in granularitySpec must |
244
|
|
|
* be set and hashed or single_dim must be used for partitionsSpec. This flag cannot be used with appendToExisting |
245
|
|
|
* of IOConfig. |
246
|
|
|
* |
247
|
|
|
* @param bool $forceGuaranteedRollup |
248
|
|
|
* |
249
|
|
|
* @return $this |
250
|
|
|
*/ |
251
|
1 |
|
public function setForceGuaranteedRollup(bool $forceGuaranteedRollup) |
252
|
|
|
{ |
253
|
1 |
|
$this->properties['forceGuaranteedRollup'] = $forceGuaranteedRollup; |
254
|
|
|
|
255
|
1 |
|
return $this; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever. |
260
|
|
|
* |
261
|
|
|
* Default: 0 |
262
|
|
|
* |
263
|
|
|
* @return $this |
264
|
|
|
* @var int |
265
|
|
|
*/ |
266
|
1 |
|
public function setPushTimeout(int $pushTimeout) |
267
|
|
|
{ |
268
|
1 |
|
$this->properties['pushTimeout'] = $pushTimeout; |
269
|
|
|
|
270
|
1 |
|
return $this; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* Segment write-out medium to use when creating segments. See SegmentWriteOutMediumFactory. |
275
|
|
|
* |
276
|
|
|
* Default: Not specified, the value from druid.peon.defaultSegmentWriteOutMediumFactory.type is used |
277
|
|
|
* |
278
|
|
|
* @return $this |
279
|
|
|
* @var string |
280
|
|
|
*/ |
281
|
1 |
|
public function setSegmentWriteOutMediumFactory(string $segmentWriteOutMediumFactory) |
282
|
|
|
{ |
283
|
1 |
|
$this->properties['segmentWriteOutMediumFactory'] = $segmentWriteOutMediumFactory; |
284
|
|
|
|
285
|
1 |
|
return $this; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
/** |
289
|
|
|
* Maximum number of worker tasks which can be run in parallel at the same time. The supervisor task would spawn |
290
|
|
|
* worker tasks up to maxNumConcurrentSubTasks regardless of the current available task slots. |
291
|
|
|
* If this value is set to 1, the supervisor task processes data ingestion on its own instead of |
292
|
|
|
* spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might |
293
|
|
|
* block other ingestion. Check Capacity Planning for more details. |
294
|
|
|
* |
295
|
|
|
* @param int $maxNumConcurrentSubTasks |
296
|
|
|
* |
297
|
|
|
* @return $this |
298
|
|
|
*/ |
299
|
1 |
|
public function setMaxNumConcurrentSubTasks(int $maxNumConcurrentSubTasks) |
300
|
|
|
{ |
301
|
1 |
|
$this->properties['maxNumConcurrentSubTasks'] = $maxNumConcurrentSubTasks; |
302
|
|
|
|
303
|
1 |
|
return $this; |
304
|
|
|
} |
305
|
|
|
|
306
|
|
|
/** |
307
|
|
|
* Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to |
308
|
|
|
* maxNumSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes |
309
|
|
|
* data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker |
310
|
|
|
* tasks can be created which might block other ingestion. Check Capacity Planning for more details. |
311
|
|
|
* |
312
|
|
|
* Default: 1 |
313
|
|
|
* |
314
|
|
|
* @return $this |
315
|
|
|
* @var int |
316
|
|
|
*/ |
317
|
3 |
|
public function setMaxNumSubTasks(int $maxNumSubTasks) |
318
|
|
|
{ |
319
|
3 |
|
$this->properties['maxNumSubTasks'] = $maxNumSubTasks; |
320
|
|
|
|
321
|
3 |
|
return $this; |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
/** |
325
|
|
|
* Maximum number of retries on task failures. |
326
|
|
|
* |
327
|
|
|
* Default: 3 |
328
|
|
|
* |
329
|
|
|
* @return $this |
330
|
|
|
* @var int |
331
|
|
|
*/ |
332
|
1 |
|
public function setMaxRetry(int $maxRetry) |
333
|
|
|
{ |
334
|
1 |
|
$this->properties['maxRetry'] = $maxRetry; |
335
|
|
|
|
336
|
1 |
|
return $this; |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
/** |
340
|
|
|
* Max limit for the number of segments that a single task can merge at the same time in the second phase. |
341
|
|
|
* Used only forceGuaranteedRollup is set. |
342
|
|
|
* |
343
|
|
|
* @param int $maxNumSegmentsToMerge |
344
|
|
|
* |
345
|
|
|
* @return $this |
346
|
|
|
*/ |
347
|
1 |
|
public function setMaxNumSegmentsToMerge(int $maxNumSegmentsToMerge) |
348
|
|
|
{ |
349
|
1 |
|
$this->properties['maxNumSegmentsToMerge'] = $maxNumSegmentsToMerge; |
350
|
|
|
|
351
|
1 |
|
return $this; |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
/** |
355
|
|
|
* Total number of tasks to merge segments in the merge phase when partitionsSpec is set to hashed or single_dim. |
356
|
|
|
* |
357
|
|
|
* @param int $totalNumMergeTasks |
358
|
|
|
* |
359
|
|
|
* @return $this |
360
|
|
|
*/ |
361
|
1 |
|
public function setTotalNumMergeTasks(int $totalNumMergeTasks) |
362
|
|
|
{ |
363
|
1 |
|
$this->properties['totalNumMergeTasks'] = $totalNumMergeTasks; |
364
|
|
|
|
365
|
1 |
|
return $this; |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
/** |
369
|
|
|
* Polling period in milliseconds to check running task statuses. |
370
|
|
|
* |
371
|
|
|
* Default: 1000 |
372
|
|
|
* |
373
|
|
|
* @return $this |
374
|
|
|
* @var int |
375
|
|
|
*/ |
376
|
1 |
|
public function setTaskStatusCheckPeriodMs(int $taskStatusCheckPeriodMs) |
377
|
|
|
{ |
378
|
1 |
|
$this->properties['taskStatusCheckPeriodMs'] = $taskStatusCheckPeriodMs; |
379
|
|
|
|
380
|
1 |
|
return $this; |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* Timeout for reporting the pushed segments in worker tasks. |
385
|
|
|
* |
386
|
|
|
* Default: PT10S |
387
|
|
|
* |
388
|
|
|
* @return $this |
389
|
|
|
* @var string |
390
|
|
|
*/ |
391
|
1 |
|
public function setChatHandlerTimeout(string $chatHandlerTimeout) |
392
|
|
|
{ |
393
|
1 |
|
$this->properties['chatHandlerTimeout'] = $chatHandlerTimeout; |
394
|
|
|
|
395
|
1 |
|
return $this; |
396
|
|
|
} |
397
|
|
|
|
398
|
|
|
/** |
399
|
|
|
* Retries for reporting the pushed segments in worker tasks. |
400
|
|
|
* |
401
|
|
|
* Default: 5 |
402
|
|
|
* |
403
|
|
|
* @return $this |
404
|
|
|
* @var int |
405
|
|
|
*/ |
406
|
1 |
|
public function setChatHandlerNumRetries(int $chatHandlerNumRetries) |
407
|
|
|
{ |
408
|
1 |
|
$this->properties['chatHandlerNumRetries'] = $chatHandlerNumRetries; |
409
|
|
|
|
410
|
1 |
|
return $this; |
411
|
|
|
} |
412
|
|
|
} |