setGroupConcatMaxLen()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Predb;
6
use App\Models\Release;
7
use Blacklight\ManticoreSearch;
8
use Exception;
9
use Illuminate\Console\Command;
10
use Illuminate\Support\Arr;
11
use Illuminate\Support\Facades\DB;
12
13
class NntmuxPopulateSearchIndexes extends Command
14
{
15
    /**
16
     * The name and signature of the console command.
17
     *
18
     * @var string
19
     */
20
    protected $signature = 'nntmux:populate
21
                                       {--manticore : Use ManticoreSearch}
22
                                       {--elastic : Use ElasticSearch}
23
                                       {--releases : Populates the releases index}
24
                                       {--predb : Populates the predb index}
25
                                       {--count=50000 : Sets the chunk size}
26
                                       {--parallel=4 : Number of parallel processes}
27
                                       {--batch-size=5000 : Batch size for bulk operations}
28
                                       {--disable-keys : Disable database keys during population}
29
                                       {--optimize : Optimize ManticoreSearch indexes}';
30
31
    /**
32
     * The console command description.
33
     *
34
     * @var string
35
     */
36
    protected $description = 'Populate Manticore/Elasticsearch indexes with either releases or predb';
37
38
    private const SUPPORTED_ENGINES = ['manticore', 'elastic'];
39
40
    private const SUPPORTED_INDEXES = ['releases', 'predb'];
41
42
    private const GROUP_CONCAT_MAX_LEN = 16384;
43
44
    private const DEFAULT_CHUNK_SIZE = 50000;
45
46
    private const DEFAULT_PARALLEL_PROCESSES = 4;
47
48
    private const DEFAULT_BATCH_SIZE = 5000;
49
50
    /**
51
     * Execute the console command.
52
     */
53
    public function handle(): int
54
    {
55
        try {
56
            if ($this->option('optimize')) {
57
                return $this->handleOptimize();
58
            }
59
60
            $engine = $this->getSelectedEngine();
61
            $index = $this->getSelectedIndex();
62
63
            if (! $engine || ! $index) {
64
                $this->error('You must specify both an engine (--manticore or --elastic) and an index (--releases or --predb).');
65
                $this->info('Use --help to see all available options.');
66
67
                return Command::FAILURE;
68
            }
69
70
            return $this->populateIndex($engine, $index);
71
72
        } catch (Exception $e) {
73
            $this->error("An error occurred: {$e->getMessage()}");
74
75
            if ($this->output->isVerbose()) {
76
                $this->error($e->getTraceAsString());
77
            }
78
79
            return Command::FAILURE;
80
        }
81
    }
82
83
    /**
84
     * Get the selected search engine from options
85
     */
86
    private function getSelectedEngine(): ?string
87
    {
88
        foreach (self::SUPPORTED_ENGINES as $engine) {
89
            if ($this->option($engine)) {
90
                return $engine;
91
            }
92
        }
93
94
        return null;
95
    }
96
97
    /**
98
     * Get the selected index from options
99
     */
100
    private function getSelectedIndex(): ?string
101
    {
102
        foreach (self::SUPPORTED_INDEXES as $index) {
103
            if ($this->option($index)) {
104
                return $index;
105
            }
106
        }
107
108
        return null;
109
    }
110
111
    /**
112
     * Handle the optimize command
113
     */
114
    private function handleOptimize(): int
115
    {
116
        $this->info('Optimizing ManticoreSearch indexes...');
117
118
        try {
119
            (new ManticoreSearch)->optimizeRTIndex();
120
            $this->info('Optimization completed successfully!');
121
122
            return Command::SUCCESS;
123
        } catch (Exception $e) {
124
            $this->error("Optimization failed: {$e->getMessage()}");
125
126
            return Command::FAILURE;
127
        }
128
    }
129
130
    /**
131
     * Populate the specified index with the specified engine
132
     */
133
    private function populateIndex(string $engine, string $index): int
134
    {
135
        $methodName = "{$engine}".ucfirst($index);
136
137
        if (! method_exists($this, $methodName)) {
138
            $this->error("Method {$methodName} not implemented.");
139
140
            return Command::FAILURE;
141
        }
142
143
        $this->info("Starting {$engine} {$index} population...");
144
145
        $startTime = microtime(true);
146
        $result = $this->{$methodName}();
147
        $executionTime = round(microtime(true) - $startTime, 2);
148
149
        if ($result === Command::SUCCESS) {
150
            $this->info("Population completed in {$executionTime} seconds.");
151
        }
152
153
        return $result;
154
    }
155
156
    private function manticoreReleases(): int
157
    {
158
        $manticore = new ManticoreSearch;
159
        $indexName = 'releases_rt';
160
161
        $manticore->truncateRTIndex(Arr::wrap($indexName));
162
163
        $total = Release::count();
164
        if (! $total) {
165
            $this->warn('Releases table is empty. Nothing to do.');
166
167
            return Command::SUCCESS;
168
        }
169
170
        $query = Release::query()
171
            ->orderByDesc('releases.id')
172
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
173
            ->select([
174
                'releases.id',
175
                'releases.name',
176
                'releases.searchname',
177
                'releases.fromname',
178
                'releases.categories_id',
179
            ])
180
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
181
            ->groupBy([
182
                'releases.id',
183
                'releases.name',
184
                'releases.searchname',
185
                'releases.fromname',
186
                'releases.categories_id',
187
            ]);
188
189
        return $this->processManticoreData(
190
            $indexName,
191
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

191
            /** @scrutinizer ignore-type */ $total,
Loading history...
192
            $query,
193
            function ($item) {
194
                return [
195
                    'id' => (string) $item->id,
196
                    'name' => (string) ($item->name ?: ''),
197
                    'searchname' => (string) ($item->searchname ?: ''),
198
                    'fromname' => (string) ($item->fromname ?: ''),
199
                    'categories_id' => (string) ($item->categories_id ?: '0'),
200
                    'filename' => (string) ($item->filename ?: ''),
201
                ];
202
            }
203
        );
204
    }
205
206
    /**
207
     * Populate ManticoreSearch predb index
208
     */
209
    private function manticorePredb(): int
210
    {
211
        $manticore = new ManticoreSearch;
212
        $indexName = 'predb_rt';
213
214
        $manticore->truncateRTIndex([$indexName]);
215
216
        $total = Predb::count();
217
        if (! $total) {
218
            $this->warn('PreDB table is empty. Nothing to do.');
219
220
            return Command::SUCCESS;
221
        }
222
223
        $query = Predb::query()
224
            ->select(['id', 'title', 'filename', 'source'])
225
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

225
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
226
227
        return $this->processManticoreData(
228
            $indexName,
229
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

229
            /** @scrutinizer ignore-type */ $total,
Loading history...
230
            $query,
231
            function ($item) {
232
                return [
233
                    'id' => $item->id,
234
                    'title' => (string) ($item->title ?? ''),
235
                    'filename' => (string) ($item->filename ?? ''),
236
                    'source' => (string) ($item->source ?? ''),
237
                ];
238
            }
239
        );
240
    }
241
242
    /**
243
     * Process data for ManticoreSearch with optimizations
244
     */
245
    private function processManticoreData(string $indexName, int $total, $query, callable $transformer): int
246
    {
247
        $manticore = new ManticoreSearch;
248
        $chunkSize = $this->getChunkSize();
249
        $batchSize = $this->getBatchSize();
250
251
        $this->optimizeDatabase();
252
        $this->setGroupConcatMaxLen();
253
254
        $this->info(sprintf(
255
            "Populating ManticoreSearch index '%s' with %s rows using chunks of %s and batch size of %s.",
256
            $indexName,
257
            number_format($total),
258
            number_format($chunkSize),
259
            number_format($batchSize)
260
        ));
261
262
        $bar = $this->output->createProgressBar($total);
263
        $bar->setFormat('verbose');
264
        $bar->start();
265
266
        $processedCount = 0;
267
        $errorCount = 0;
268
        $batchData = [];
269
270
        try {
271
            $query->chunk($chunkSize, function ($items) use ($manticore, $indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize, &$batchData) {
272
                foreach ($items as $item) {
273
                    try {
274
                        $batchData[] = $transformer($item);
275
                        $processedCount++;
276
277
                        // Process in optimized batch sizes
278
                        if (count($batchData) >= $batchSize) {
279
                            $this->processBatch($manticore, $indexName, $batchData);
280
                            $batchData = [];
281
                        }
282
                    } catch (Exception $e) {
283
                        $errorCount++;
284
                        if ($this->output->isVerbose()) {
285
                            $this->error("Error processing item {$item->id}: {$e->getMessage()}");
286
                        }
287
                    }
288
                    $bar->advance();
289
                }
290
            });
291
292
            // Process remaining items
293
            if (! empty($batchData)) {
294
                $this->processBatch($manticore, $indexName, $batchData);
295
            }
296
297
            $bar->finish();
298
            $this->newLine();
299
300
            if ($errorCount > 0) {
301
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
302
            } else {
303
                $this->info('ManticoreSearch population completed successfully!');
304
            }
305
306
            return Command::SUCCESS;
307
308
        } catch (Exception $e) {
309
            $bar->finish();
310
            $this->newLine();
311
            $this->error("Failed to populate ManticoreSearch: {$e->getMessage()}");
312
313
            return Command::FAILURE;
314
        } finally {
315
            $this->restoreDatabase();
316
        }
317
    }
318
319
    /**
320
     * Populate ElasticSearch releases index
321
     */
322
    private function elasticReleases(): int
323
    {
324
        $total = Release::count();
325
        if (! $total) {
326
            $this->warn('Releases table is empty. Nothing to do.');
327
328
            return Command::SUCCESS;
329
        }
330
331
        $query = Release::query()
332
            ->orderByDesc('releases.id')
333
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
334
            ->select([
335
                'releases.id',
336
                'releases.name',
337
                'releases.searchname',
338
                'releases.fromname',
339
                'releases.categories_id',
340
                'releases.postdate',
341
            ])
342
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
343
            ->groupBy([
344
                'releases.id',
345
                'releases.name',
346
                'releases.searchname',
347
                'releases.fromname',
348
                'releases.categories_id',
349
                'releases.postdate',
350
            ]);
351
352
        return $this->processElasticData(
353
            'releases',
354
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

354
            /** @scrutinizer ignore-type */ $total,
Loading history...
355
            $query,
356
            function ($item) {
357
                $searchName = str_replace(['.', '-'], ' ', $item->searchname ?? '');
358
359
                return [
360
                    'id' => $item->id,
361
                    'name' => $item->name,
362
                    'searchname' => $item->searchname,
363
                    'plainsearchname' => $searchName,
364
                    'fromname' => $item->fromname,
365
                    'categories_id' => $item->categories_id,
366
                    'filename' => $item->filename ?? '',
367
                    'postdate' => $item->postdate,
368
                ];
369
            }
370
        );
371
    }
372
373
    /**
374
     * Populate ElasticSearch predb index
375
     */
376
    private function elasticPredb(): int
377
    {
378
        $total = Predb::count();
379
        if (! $total) {
380
            $this->warn('PreDB table is empty. Nothing to do.');
381
382
            return Command::SUCCESS;
383
        }
384
385
        $query = Predb::query()
386
            ->select(['id', 'title', 'filename', 'source'])
387
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

387
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
388
389
        return $this->processElasticData(
390
            'predb',
391
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

391
            /** @scrutinizer ignore-type */ $total,
Loading history...
392
            $query,
393
            function ($item) {
394
                return [
395
                    'id' => $item->id,
396
                    'title' => $item->title,
397
                    'filename' => $item->filename,
398
                    'source' => $item->source,
399
                ];
400
            }
401
        );
402
    }
403
404
    /**
405
     * Process data for ElasticSearch with optimizations
406
     */
407
    private function processElasticData(string $indexName, int $total, $query, callable $transformer): int
408
    {
409
        $chunkSize = $this->getChunkSize();
410
        $batchSize = $this->getBatchSize();
411
412
        $this->optimizeDatabase();
413
        $this->setGroupConcatMaxLen();
414
415
        $this->info(sprintf(
416
            "Populating ElasticSearch index '%s' with %s rows using chunks of %s and batch size of %s.",
417
            $indexName,
418
            number_format($total),
419
            number_format($chunkSize),
420
            number_format($batchSize)
421
        ));
422
423
        $bar = $this->output->createProgressBar($total);
424
        $bar->setFormat('verbose');
425
        $bar->start();
426
427
        $processedCount = 0;
428
        $errorCount = 0;
429
430
        try {
431
            $query->chunk($chunkSize, function ($items) use ($indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize) {
432
                // Process in optimized batches for ElasticSearch
433
                foreach ($items->chunk($batchSize) as $batch) {
434
                    $data = ['body' => []];
435
436
                    foreach ($batch as $item) {
437
                        try {
438
                            $transformedData = $transformer($item);
439
440
                            $data['body'][] = [
441
                                'index' => [
442
                                    '_index' => $indexName,
443
                                    '_id' => $item->id,
444
                                ],
445
                            ];
446
                            $data['body'][] = $transformedData;
447
448
                            $processedCount++;
449
                        } catch (Exception $e) {
450
                            $errorCount++;
451
                            if ($this->output->isVerbose()) {
452
                                $this->error("Error processing item {$item->id}: {$e->getMessage()}");
453
                            }
454
                        }
455
456
                        $bar->advance();
457
                    }
458
459
                    if (! empty($data['body'])) {
460
                        $this->processElasticBatch($data, $errorCount);
461
                    }
462
                }
463
            });
464
465
            $bar->finish();
466
            $this->newLine();
467
468
            if ($errorCount > 0) {
469
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
470
            } else {
471
                $this->info('ElasticSearch population completed successfully!');
472
            }
473
474
            return Command::SUCCESS;
475
476
        } catch (Exception $e) {
477
            $bar->finish();
478
            $this->newLine();
479
            $this->error("Failed to populate ElasticSearch: {$e->getMessage()}");
480
481
            return Command::FAILURE;
482
        } finally {
483
            $this->restoreDatabase();
484
        }
485
    }
486
487
    /**
488
     * Process ManticoreSearch batch with retry logic
489
     */
490
    private function processBatch(ManticoreSearch $manticore, string $indexName, array $data): void
491
    {
492
        $retries = 3;
493
        $attempt = 0;
494
495
        while ($attempt < $retries) {
496
            try {
497
                $manticore->manticoreSearch->table($indexName)->replaceDocuments($data);
498
                break;
499
            } catch (Exception $e) {
500
                $attempt++;
501
                if ($attempt >= $retries) {
502
                    throw $e;
503
                }
504
                usleep(100000); // 100ms delay before retry
505
            }
506
        }
507
    }
508
509
    /**
510
     * Process ElasticSearch batch with retry logic
511
     */
512
    private function processElasticBatch(array $data, int &$errorCount): void
513
    {
514
        $retries = 3;
515
        $attempt = 0;
516
517
        while ($attempt < $retries) {
518
            try {
519
                $response = \Elasticsearch::bulk($data);
520
521
                // Check for errors in bulk response
522
                if (isset($response['errors']) && $response['errors']) {
523
                    foreach ($response['items'] as $item) {
524
                        if (isset($item['index']['error'])) {
525
                            $errorCount++;
526
                            if ($this->output->isVerbose()) {
527
                                $this->error('ElasticSearch error: '.json_encode($item['index']['error']));
528
                            }
529
                        }
530
                    }
531
                }
532
                break;
533
            } catch (Exception $e) {
534
                $attempt++;
535
                if ($attempt >= $retries) {
536
                    throw $e;
537
                }
538
                usleep(100000); // 100ms delay before retry
539
            }
540
        }
541
    }
542
543
    /**
544
     * Optimize database settings for bulk operations
545
     */
546
    private function optimizeDatabase(): void
547
    {
548
        if ($this->option('disable-keys')) {
549
            $this->info('Disabling database keys for faster bulk operations...');
550
551
            try {
552
                // Disable foreign key checks
553
                DB::statement('SET FOREIGN_KEY_CHECKS = 0');
554
                DB::statement('SET UNIQUE_CHECKS = 0');
555
                DB::statement('SET AUTOCOMMIT = 0');
556
557
                // Increase buffer sizes
558
                DB::statement('SET SESSION innodb_buffer_pool_size = 1073741824'); // 1GB
559
                DB::statement('SET SESSION bulk_insert_buffer_size = 268435456'); // 256MB
560
                DB::statement('SET SESSION read_buffer_size = 2097152'); // 2MB
561
                DB::statement('SET SESSION sort_buffer_size = 16777216'); // 16MB
562
563
            } catch (Exception $e) {
564
                $this->warn("Could not optimize database settings: {$e->getMessage()}");
565
            }
566
        }
567
    }
568
569
    /**
570
     * Restore database settings after bulk operations
571
     */
572
    private function restoreDatabase(): void
573
    {
574
        if ($this->option('disable-keys')) {
575
            $this->info('Restoring database settings...');
576
577
            try {
578
                DB::statement('SET FOREIGN_KEY_CHECKS = 1');
579
                DB::statement('SET UNIQUE_CHECKS = 1');
580
                DB::statement('SET AUTOCOMMIT = 1');
581
                DB::statement('COMMIT');
582
            } catch (Exception $e) {
583
                $this->warn("Could not restore database settings: {$e->getMessage()}");
584
            }
585
        }
586
    }
587
588
    /**
589
     * Get the chunk size from options
590
     */
591
    private function getChunkSize(): int
592
    {
593
        $chunkSize = (int) $this->option('count');
594
595
        return $chunkSize > 0 ? $chunkSize : self::DEFAULT_CHUNK_SIZE;
596
    }
597
598
    /**
599
     * Get the batch size from options
600
     */
601
    private function getBatchSize(): int
602
    {
603
        $batchSize = (int) $this->option('batch-size');
604
605
        return $batchSize > 0 ? $batchSize : self::DEFAULT_BATCH_SIZE;
606
    }
607
608
    /**
609
     * Set the GROUP_CONCAT max length for the session
610
     */
611
    private function setGroupConcatMaxLen(): void
612
    {
613
        DB::statement('SET SESSION group_concat_max_len = ?', [self::GROUP_CONCAT_MAX_LEN]);
614
    }
615
}
616