|
1
|
|
|
<?php |
|
2
|
|
|
declare(strict_types=1); |
|
3
|
|
|
|
|
4
|
|
|
namespace Level23\Druid\TuningConfig; |
|
5
|
|
|
|
|
6
|
|
|
class TuningConfig implements TuningConfigInterface |
|
7
|
|
|
{ |
|
8
|
|
|
/** |
|
9
|
|
|
* @var array |
|
10
|
|
|
*/ |
|
11
|
|
|
protected $properties = []; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* TuningConfig constructor. |
|
15
|
|
|
* |
|
16
|
|
|
* @param array $properties |
|
17
|
|
|
*/ |
|
18
|
11 |
|
public function __construct(array $properties = []) |
|
19
|
|
|
{ |
|
20
|
11 |
|
foreach ($properties as $key => $value) { |
|
21
|
|
|
|
|
22
|
9 |
|
$method = 'set' . $key; |
|
23
|
|
|
|
|
24
|
9 |
|
$callable = [$this, $method]; |
|
25
|
9 |
|
if (!is_callable($callable)) { |
|
26
|
2 |
|
$this->properties[$key] = $value; |
|
27
|
2 |
|
continue; |
|
28
|
|
|
} |
|
29
|
|
|
|
|
30
|
7 |
|
call_user_func($callable, $value); |
|
31
|
|
|
} |
|
32
|
11 |
|
} |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* Return the context as it can be used in the druid query. |
|
36
|
|
|
* |
|
37
|
|
|
* @return array |
|
38
|
|
|
*/ |
|
39
|
9 |
|
public function toArray(): array |
|
40
|
|
|
{ |
|
41
|
9 |
|
return $this->properties; |
|
42
|
|
|
} |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* The task type |
|
46
|
|
|
* |
|
47
|
|
|
* @return $this |
|
48
|
|
|
* @var string |
|
49
|
|
|
* @required |
|
50
|
|
|
*/ |
|
51
|
7 |
|
public function setType(string $type) |
|
52
|
|
|
{ |
|
53
|
7 |
|
$this->properties['type'] = $type; |
|
54
|
|
|
|
|
55
|
7 |
|
return $this; |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* Used in sharding. Determines how many rows are in each segment. |
|
60
|
|
|
* Default: 5000000 |
|
61
|
|
|
* |
|
62
|
|
|
* @return $this |
|
63
|
|
|
* @var int |
|
64
|
|
|
*/ |
|
65
|
3 |
|
public function setMaxRowsPerSegment(int $maxRowsPerSegment) |
|
66
|
|
|
{ |
|
67
|
3 |
|
$this->properties['maxRowsPerSegment'] = $maxRowsPerSegment; |
|
68
|
|
|
|
|
69
|
3 |
|
return $this; |
|
70
|
|
|
} |
|
71
|
|
|
|
|
72
|
|
|
/** |
|
73
|
|
|
* Used in determining when intermediate persists to disk should occur. Normally user does not need to set this, |
|
74
|
|
|
* but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million |
|
75
|
|
|
* rows in memory and this value should be set. |
|
76
|
|
|
* |
|
77
|
|
|
* Default: 1000000 |
|
78
|
|
|
* |
|
79
|
|
|
* @return $this |
|
80
|
|
|
* @var int |
|
81
|
|
|
*/ |
|
82
|
2 |
|
public function setMaxRowsInMemory(int $maxRowsInMemory) |
|
83
|
|
|
{ |
|
84
|
2 |
|
$this->properties['maxRowsInMemory'] = $maxRowsInMemory; |
|
85
|
|
|
|
|
86
|
2 |
|
return $this; |
|
87
|
|
|
} |
|
88
|
|
|
|
|
89
|
|
|
/** |
|
90
|
|
|
* Used in determining when intermediate persists to disk should occur. Normally this is computed internally and |
|
91
|
|
|
* user does not need to set it. This value represents number of bytes to aggregate in heap memory before |
|
92
|
|
|
* persisting. This is based on a rough estimate of memory usage and not actual usage. The maximum heap memory |
|
93
|
|
|
* usage for indexing is maxBytesInMemory * (2 + maxPendingPersists) |
|
94
|
|
|
* |
|
95
|
|
|
* Default: 1/6 of max JVM memory |
|
96
|
|
|
* |
|
97
|
|
|
* @return $this |
|
98
|
|
|
* @var int |
|
99
|
|
|
*/ |
|
100
|
1 |
|
public function setMaxBytesInMemory(int $maxBytesInMemory) |
|
101
|
|
|
{ |
|
102
|
1 |
|
$this->properties['maxBytesInMemory'] = $maxBytesInMemory; |
|
103
|
|
|
|
|
104
|
1 |
|
return $this; |
|
105
|
|
|
} |
|
106
|
|
|
|
|
107
|
|
|
/** |
|
108
|
|
|
* Total number of rows in segments waiting for being pushed. Used in determining when intermediate pushing should |
|
109
|
|
|
* occur. |
|
110
|
|
|
* |
|
111
|
|
|
* Default: 20000000 |
|
112
|
|
|
* |
|
113
|
|
|
* @return $this |
|
114
|
|
|
* @var int |
|
115
|
|
|
*/ |
|
116
|
1 |
|
public function setMaxTotalRows(int $maxTotalRows) |
|
117
|
|
|
{ |
|
118
|
1 |
|
$this->properties['maxTotalRows'] = $maxTotalRows; |
|
119
|
|
|
|
|
120
|
1 |
|
return $this; |
|
121
|
|
|
} |
|
122
|
|
|
|
|
123
|
|
|
/** |
|
124
|
|
|
* Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the |
|
125
|
|
|
* granularitySpec, the index task can skip the determine intervals/partitions pass through the data. numShards |
|
126
|
|
|
* cannot be specified if maxRowsPerSegment is set. |
|
127
|
|
|
* |
|
128
|
|
|
* Default: null |
|
129
|
|
|
* |
|
130
|
|
|
* @return $this |
|
131
|
|
|
* @var int |
|
132
|
|
|
*/ |
|
133
|
1 |
|
public function setNumShards(int $numShards) |
|
134
|
|
|
{ |
|
135
|
1 |
|
$this->properties['numShards'] = $numShards; |
|
136
|
|
|
|
|
137
|
1 |
|
return $this; |
|
138
|
|
|
} |
|
139
|
|
|
|
|
140
|
|
|
/** |
|
141
|
|
|
* Used to give a hint to control the amount of data that each first phase task reads. |
|
142
|
|
|
* This hint could be ignored depending on the implementation of the input source. See Split hint spec for more |
|
143
|
|
|
* details. |
|
144
|
|
|
* |
|
145
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/native-batch.html#split-hint-spec |
|
146
|
|
|
* |
|
147
|
|
|
* @param array $splitHintSpec |
|
148
|
|
|
* |
|
149
|
|
|
* @return \Level23\Druid\TuningConfig\TuningConfig |
|
150
|
|
|
*/ |
|
151
|
1 |
|
public function setSplitHintSpec(array $splitHintSpec) |
|
152
|
|
|
{ |
|
153
|
1 |
|
$this->properties['splitHintSpec'] = $splitHintSpec; |
|
154
|
|
|
|
|
155
|
1 |
|
return $this; |
|
156
|
|
|
} |
|
157
|
|
|
|
|
158
|
|
|
/** |
|
159
|
|
|
* Defines how to partition data in each timeChunk, see PartitionsSpec |
|
160
|
|
|
* |
|
161
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/native-batch.html#partitionsspec |
|
162
|
|
|
* |
|
163
|
|
|
* @param array $partitionsSpec |
|
164
|
|
|
* |
|
165
|
|
|
* @return $this |
|
166
|
|
|
*/ |
|
167
|
1 |
|
public function setPartitionsSpec(array $partitionsSpec) |
|
168
|
|
|
{ |
|
169
|
1 |
|
$this->properties['partitionsSpec'] = $partitionsSpec; |
|
170
|
|
|
|
|
171
|
1 |
|
return $this; |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
|
|
/** |
|
175
|
|
|
* Defines segment storage format options to be used at indexing time |
|
176
|
|
|
* |
|
177
|
|
|
* @see https://druid.apache.org/docs/latest/ingestion/native_tasks.html#indexspec |
|
178
|
|
|
* |
|
179
|
|
|
* @var array |
|
180
|
|
|
* @return $this |
|
181
|
|
|
*/ |
|
182
|
1 |
|
public function setIndexSpec(array $indexSpec) |
|
183
|
|
|
{ |
|
184
|
1 |
|
$this->properties['indexSpec'] = $indexSpec; |
|
185
|
|
|
|
|
186
|
1 |
|
return $this; |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
|
|
/** |
|
190
|
|
|
* Defines segment storage format options to be used at indexing time for intermediate persisted temporary segments. |
|
191
|
|
|
* This can be used to disable dimension/metric compression on intermediate segments to reduce memory required for |
|
192
|
|
|
* final merging. however, disabling compression on intermediate segments might increase page cache use while they |
|
193
|
|
|
* are used before getting merged into final segment published, see IndexSpec for possible values. |
|
194
|
|
|
* |
|
195
|
|
|
* @see https://druid.apache.org/docs/0.20.2/ingestion/index.html#indexspec |
|
196
|
|
|
* |
|
197
|
|
|
* @param array $indexSpecForIntermediatePersists |
|
198
|
|
|
* |
|
199
|
|
|
* @return $this |
|
200
|
|
|
*/ |
|
201
|
1 |
|
public function setIndexSpecForIntermediatePersists(array $indexSpecForIntermediatePersists) |
|
202
|
|
|
{ |
|
203
|
1 |
|
$this->properties['indexSpecForIntermediatePersists'] = $indexSpecForIntermediatePersists; |
|
204
|
|
|
|
|
205
|
1 |
|
return $this; |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
/** |
|
209
|
|
|
* Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new |
|
210
|
|
|
* intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory |
|
211
|
|
|
* usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists). |
|
212
|
|
|
* |
|
213
|
|
|
* Default: 0 (meaning one persist can be running concurrently with ingestion, and none can be queued up) |
|
214
|
|
|
* |
|
215
|
|
|
* @return $this |
|
216
|
|
|
* @var int |
|
217
|
|
|
*/ |
|
218
|
1 |
|
public function setMaxPendingPersists(int $maxPendingPersists) |
|
219
|
|
|
{ |
|
220
|
1 |
|
$this->properties['maxPendingPersists'] = $maxPendingPersists; |
|
221
|
|
|
|
|
222
|
1 |
|
return $this; |
|
223
|
|
|
} |
|
224
|
|
|
|
|
225
|
|
|
/** |
|
226
|
|
|
* If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable |
|
227
|
|
|
* rows and fields will be skipped. |
|
228
|
|
|
* |
|
229
|
|
|
* Default: false |
|
230
|
|
|
* |
|
231
|
|
|
* @return $this |
|
232
|
|
|
* @var bool |
|
233
|
|
|
*/ |
|
234
|
1 |
|
public function setReportParseExceptions(bool $reportParseExceptions) |
|
235
|
|
|
{ |
|
236
|
1 |
|
$this->properties['reportParseExceptions'] = $reportParseExceptions; |
|
237
|
|
|
|
|
238
|
1 |
|
return $this; |
|
239
|
|
|
} |
|
240
|
|
|
|
|
241
|
|
|
/** |
|
242
|
|
|
* Forces guaranteeing the perfect rollup. The perfect rollup optimizes the total size of generated segments and |
|
243
|
|
|
* querying time while indexing time will be increased. If this is set to true, intervals in granularitySpec must |
|
244
|
|
|
* be set and hashed or single_dim must be used for partitionsSpec. This flag cannot be used with appendToExisting |
|
245
|
|
|
* of IOConfig. |
|
246
|
|
|
* |
|
247
|
|
|
* @param bool $forceGuaranteedRollup |
|
248
|
|
|
* |
|
249
|
|
|
* @return $this |
|
250
|
|
|
*/ |
|
251
|
1 |
|
public function setForceGuaranteedRollup(bool $forceGuaranteedRollup) |
|
252
|
|
|
{ |
|
253
|
1 |
|
$this->properties['forceGuaranteedRollup'] = $forceGuaranteedRollup; |
|
254
|
|
|
|
|
255
|
1 |
|
return $this; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
/** |
|
259
|
|
|
* Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever. |
|
260
|
|
|
* |
|
261
|
|
|
* Default: 0 |
|
262
|
|
|
* |
|
263
|
|
|
* @return $this |
|
264
|
|
|
* @var int |
|
265
|
|
|
*/ |
|
266
|
1 |
|
public function setPushTimeout(int $pushTimeout) |
|
267
|
|
|
{ |
|
268
|
1 |
|
$this->properties['pushTimeout'] = $pushTimeout; |
|
269
|
|
|
|
|
270
|
1 |
|
return $this; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
/** |
|
274
|
|
|
* Segment write-out medium to use when creating segments. See SegmentWriteOutMediumFactory. |
|
275
|
|
|
* |
|
276
|
|
|
* Default: Not specified, the value from druid.peon.defaultSegmentWriteOutMediumFactory.type is used |
|
277
|
|
|
* |
|
278
|
|
|
* @return $this |
|
279
|
|
|
* @var string |
|
280
|
|
|
*/ |
|
281
|
1 |
|
public function setSegmentWriteOutMediumFactory(string $segmentWriteOutMediumFactory) |
|
282
|
|
|
{ |
|
283
|
1 |
|
$this->properties['segmentWriteOutMediumFactory'] = $segmentWriteOutMediumFactory; |
|
284
|
|
|
|
|
285
|
1 |
|
return $this; |
|
286
|
|
|
} |
|
287
|
|
|
|
|
288
|
|
|
/** |
|
289
|
|
|
* Maximum number of worker tasks which can be run in parallel at the same time. The supervisor task would spawn |
|
290
|
|
|
* worker tasks up to maxNumConcurrentSubTasks regardless of the current available task slots. |
|
291
|
|
|
* If this value is set to 1, the supervisor task processes data ingestion on its own instead of |
|
292
|
|
|
* spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might |
|
293
|
|
|
* block other ingestion. Check Capacity Planning for more details. |
|
294
|
|
|
* |
|
295
|
|
|
* @param int $maxNumConcurrentSubTasks |
|
296
|
|
|
* |
|
297
|
|
|
* @return $this |
|
298
|
|
|
*/ |
|
299
|
1 |
|
public function setMaxNumConcurrentSubTasks(int $maxNumConcurrentSubTasks) |
|
300
|
|
|
{ |
|
301
|
1 |
|
$this->properties['maxNumConcurrentSubTasks'] = $maxNumConcurrentSubTasks; |
|
302
|
|
|
|
|
303
|
1 |
|
return $this; |
|
304
|
|
|
} |
|
305
|
|
|
|
|
306
|
|
|
/** |
|
307
|
|
|
* Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to |
|
308
|
|
|
* maxNumSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes |
|
309
|
|
|
* data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker |
|
310
|
|
|
* tasks can be created which might block other ingestion. Check Capacity Planning for more details. |
|
311
|
|
|
* |
|
312
|
|
|
* Default: 1 |
|
313
|
|
|
* |
|
314
|
|
|
* @return $this |
|
315
|
|
|
* @var int |
|
316
|
|
|
*/ |
|
317
|
3 |
|
public function setMaxNumSubTasks(int $maxNumSubTasks) |
|
318
|
|
|
{ |
|
319
|
3 |
|
$this->properties['maxNumSubTasks'] = $maxNumSubTasks; |
|
320
|
|
|
|
|
321
|
3 |
|
return $this; |
|
322
|
|
|
} |
|
323
|
|
|
|
|
324
|
|
|
/** |
|
325
|
|
|
* Maximum number of retries on task failures. |
|
326
|
|
|
* |
|
327
|
|
|
* Default: 3 |
|
328
|
|
|
* |
|
329
|
|
|
* @return $this |
|
330
|
|
|
* @var int |
|
331
|
|
|
*/ |
|
332
|
1 |
|
public function setMaxRetry(int $maxRetry) |
|
333
|
|
|
{ |
|
334
|
1 |
|
$this->properties['maxRetry'] = $maxRetry; |
|
335
|
|
|
|
|
336
|
1 |
|
return $this; |
|
337
|
|
|
} |
|
338
|
|
|
|
|
339
|
|
|
/** |
|
340
|
|
|
* Max limit for the number of segments that a single task can merge at the same time in the second phase. |
|
341
|
|
|
* Used only forceGuaranteedRollup is set. |
|
342
|
|
|
* |
|
343
|
|
|
* @param int $maxNumSegmentsToMerge |
|
344
|
|
|
* |
|
345
|
|
|
* @return $this |
|
346
|
|
|
*/ |
|
347
|
1 |
|
public function setMaxNumSegmentsToMerge(int $maxNumSegmentsToMerge) |
|
348
|
|
|
{ |
|
349
|
1 |
|
$this->properties['maxNumSegmentsToMerge'] = $maxNumSegmentsToMerge; |
|
350
|
|
|
|
|
351
|
1 |
|
return $this; |
|
352
|
|
|
} |
|
353
|
|
|
|
|
354
|
|
|
/** |
|
355
|
|
|
* Total number of tasks to merge segments in the merge phase when partitionsSpec is set to hashed or single_dim. |
|
356
|
|
|
* |
|
357
|
|
|
* @param int $totalNumMergeTasks |
|
358
|
|
|
* |
|
359
|
|
|
* @return $this |
|
360
|
|
|
*/ |
|
361
|
1 |
|
public function setTotalNumMergeTasks(int $totalNumMergeTasks) |
|
362
|
|
|
{ |
|
363
|
1 |
|
$this->properties['totalNumMergeTasks'] = $totalNumMergeTasks; |
|
364
|
|
|
|
|
365
|
1 |
|
return $this; |
|
366
|
|
|
} |
|
367
|
|
|
|
|
368
|
|
|
/** |
|
369
|
|
|
* Polling period in milliseconds to check running task statuses. |
|
370
|
|
|
* |
|
371
|
|
|
* Default: 1000 |
|
372
|
|
|
* |
|
373
|
|
|
* @return $this |
|
374
|
|
|
* @var int |
|
375
|
|
|
*/ |
|
376
|
1 |
|
public function setTaskStatusCheckPeriodMs(int $taskStatusCheckPeriodMs) |
|
377
|
|
|
{ |
|
378
|
1 |
|
$this->properties['taskStatusCheckPeriodMs'] = $taskStatusCheckPeriodMs; |
|
379
|
|
|
|
|
380
|
1 |
|
return $this; |
|
381
|
|
|
} |
|
382
|
|
|
|
|
383
|
|
|
/** |
|
384
|
|
|
* Timeout for reporting the pushed segments in worker tasks. |
|
385
|
|
|
* |
|
386
|
|
|
* Default: PT10S |
|
387
|
|
|
* |
|
388
|
|
|
* @return $this |
|
389
|
|
|
* @var string |
|
390
|
|
|
*/ |
|
391
|
1 |
|
public function setChatHandlerTimeout(string $chatHandlerTimeout) |
|
392
|
|
|
{ |
|
393
|
1 |
|
$this->properties['chatHandlerTimeout'] = $chatHandlerTimeout; |
|
394
|
|
|
|
|
395
|
1 |
|
return $this; |
|
396
|
|
|
} |
|
397
|
|
|
|
|
398
|
|
|
/** |
|
399
|
|
|
* Retries for reporting the pushed segments in worker tasks. |
|
400
|
|
|
* |
|
401
|
|
|
* Default: 5 |
|
402
|
|
|
* |
|
403
|
|
|
* @return $this |
|
404
|
|
|
* @var int |
|
405
|
|
|
*/ |
|
406
|
1 |
|
public function setChatHandlerNumRetries(int $chatHandlerNumRetries) |
|
407
|
|
|
{ |
|
408
|
1 |
|
$this->properties['chatHandlerNumRetries'] = $chatHandlerNumRetries; |
|
409
|
|
|
|
|
410
|
1 |
|
return $this; |
|
411
|
|
|
} |
|
412
|
|
|
} |