Passed
Push — master ( dc98af...23ee52 )
by Darko
07:01
created

NntmuxPopulateSearchIndexes   F

Complexity

Total Complexity 70

Size/Duplication

Total Lines 603
Duplicated Lines 0 %

Importance

Changes 7
Bugs 3 Features 0
Metric Value
wmc 70
eloc 312
c 7
b 3
f 0
dl 0
loc 603
rs 2.8

18 Methods

Rating   Name   Duplication   Size   Complexity  
B processElasticData() 0 77 8
A getSelectedIndex() 0 9 3
A populateIndex() 0 21 3
A getChunkSize() 0 5 2
A restoreDatabase() 0 12 3
A getBatchSize() 0 5 2
A optimizeDatabase() 0 19 3
A getSelectedEngine() 0 9 3
A processBatch() 0 15 4
A setGroupConcatMaxLen() 0 3 1
A elasticPredb() 0 23 2
A elasticReleases() 0 46 2
A manticorePredb() 0 29 2
A handleOptimize() 0 13 2
A handle() 0 27 6
B processManticoreData() 0 71 8
B manticoreReleases() 0 46 7
B processElasticBatch() 0 27 9

How to fix   Complexity   

Complex Class

Complex classes like NntmuxPopulateSearchIndexes often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use NntmuxPopulateSearchIndexes, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Predb;
6
use App\Models\Release;
7
use Blacklight\ManticoreSearch;
8
use Exception;
9
use Illuminate\Console\Command;
10
use Illuminate\Support\Arr;
11
use Illuminate\Support\Facades\DB;
12
13
class NntmuxPopulateSearchIndexes extends Command
14
{
15
    /**
16
     * The name and signature of the console command.
17
     *
18
     * @var string
19
     */
20
    protected $signature = 'nntmux:populate
21
                                       {--manticore : Use ManticoreSearch}
22
                                       {--elastic : Use ElasticSearch}
23
                                       {--releases : Populates the releases index}
24
                                       {--predb : Populates the predb index}
25
                                       {--count=50000 : Sets the chunk size}
26
                                       {--parallel=4 : Number of parallel processes}
27
                                       {--batch-size=5000 : Batch size for bulk operations}
28
                                       {--disable-keys : Disable database keys during population}
29
                                       {--optimize : Optimize ManticoreSearch indexes}';
30
31
    /**
32
     * The console command description.
33
     *
34
     * @var string
35
     */
36
    protected $description = 'Populate Manticore/Elasticsearch indexes with either releases or predb';
37
38
    private const SUPPORTED_ENGINES = ['manticore', 'elastic'];
39
40
    private const SUPPORTED_INDEXES = ['releases', 'predb'];
41
42
    private const GROUP_CONCAT_MAX_LEN = 16384;
43
44
    private const DEFAULT_CHUNK_SIZE = 50000;
45
46
    private const DEFAULT_PARALLEL_PROCESSES = 4;
47
48
    private const DEFAULT_BATCH_SIZE = 5000;
49
50
    /**
51
     * Execute the console command.
52
     */
53
    public function handle(): int
54
    {
55
        try {
56
            if ($this->option('optimize')) {
57
                return $this->handleOptimize();
58
            }
59
60
            $engine = $this->getSelectedEngine();
61
            $index = $this->getSelectedIndex();
62
63
            if (! $engine || ! $index) {
64
                $this->error('You must specify both an engine (--manticore or --elastic) and an index (--releases or --predb).');
65
                $this->info('Use --help to see all available options.');
66
67
                return Command::FAILURE;
68
            }
69
70
            return $this->populateIndex($engine, $index);
71
72
        } catch (Exception $e) {
73
            $this->error("An error occurred: {$e->getMessage()}");
74
75
            if ($this->output->isVerbose()) {
76
                $this->error($e->getTraceAsString());
77
            }
78
79
            return Command::FAILURE;
80
        }
81
    }
82
83
    /**
84
     * Get the selected search engine from options
85
     */
86
    private function getSelectedEngine(): ?string
87
    {
88
        foreach (self::SUPPORTED_ENGINES as $engine) {
89
            if ($this->option($engine)) {
90
                return $engine;
91
            }
92
        }
93
94
        return null;
95
    }
96
97
    /**
98
     * Get the selected index from options
99
     */
100
    private function getSelectedIndex(): ?string
101
    {
102
        foreach (self::SUPPORTED_INDEXES as $index) {
103
            if ($this->option($index)) {
104
                return $index;
105
            }
106
        }
107
108
        return null;
109
    }
110
111
    /**
112
     * Handle the optimize command
113
     */
114
    private function handleOptimize(): int
115
    {
116
        $this->info('Optimizing ManticoreSearch indexes...');
117
118
        try {
119
            (new ManticoreSearch)->optimizeRTIndex();
120
            $this->info('Optimization completed successfully!');
121
122
            return Command::SUCCESS;
123
        } catch (Exception $e) {
124
            $this->error("Optimization failed: {$e->getMessage()}");
125
126
            return Command::FAILURE;
127
        }
128
    }
129
130
    /**
131
     * Populate the specified index with the specified engine
132
     */
133
    private function populateIndex(string $engine, string $index): int
134
    {
135
        $methodName = "{$engine}".ucfirst($index);
136
137
        if (! method_exists($this, $methodName)) {
138
            $this->error("Method {$methodName} not implemented.");
139
140
            return Command::FAILURE;
141
        }
142
143
        $this->info("Starting {$engine} {$index} population...");
144
145
        $startTime = microtime(true);
146
        $result = $this->{$methodName}();
147
        $executionTime = round(microtime(true) - $startTime, 2);
148
149
        if ($result === Command::SUCCESS) {
150
            $this->info("Population completed in {$executionTime} seconds.");
151
        }
152
153
        return $result;
154
    }
155
156
    private function manticoreReleases(): int
157
    {
158
        $manticore = new ManticoreSearch;
159
        $indexName = 'releases_rt';
160
161
        $manticore->truncateRTIndex(Arr::wrap($indexName));
162
163
        $total = Release::count();
164
        if (! $total) {
165
            $this->warn('Releases table is empty. Nothing to do.');
166
167
            return Command::SUCCESS;
168
        }
169
170
        $query = Release::query()
171
            ->orderByDesc('releases.id')
172
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
173
            ->select([
174
                'releases.id',
175
                'releases.name',
176
                'releases.searchname',
177
                'releases.fromname',
178
                'releases.categories_id',
179
            ])
180
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
181
            ->groupBy([
182
                'releases.id',
183
                'releases.name',
184
                'releases.searchname',
185
                'releases.fromname',
186
                'releases.categories_id',
187
            ]);
188
189
        return $this->processManticoreData(
190
            $indexName,
191
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

191
            /** @scrutinizer ignore-type */ $total,
Loading history...
192
            $query,
193
            function ($item) {
194
                return [
195
                    'id' => (string) $item->id,
196
                    'name' => (string) ($item->name ?: ''),
197
                    'searchname' => (string) ($item->searchname ?: ''),
198
                    'fromname' => (string) ($item->fromname ?: ''),
199
                    'categories_id' => (string) ($item->categories_id ?: '0'),
200
                    'filename' => (string) ($item->filename ?: ''),
201
                    'dummy' => '1',
202
                ];
203
            }
204
        );
205
    }
206
207
    /**
208
     * Populate ManticoreSearch predb index
209
     */
210
    private function manticorePredb(): int
211
    {
212
        $manticore = new ManticoreSearch;
213
        $indexName = 'predb_rt';
214
215
        $manticore->truncateRTIndex([$indexName]);
216
217
        $total = Predb::count();
218
        if (! $total) {
219
            $this->warn('PreDB table is empty. Nothing to do.');
220
221
            return Command::SUCCESS;
222
        }
223
224
        $query = Predb::query()
225
            ->select(['id', 'title', 'filename', 'source'])
226
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

226
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
227
228
        return $this->processManticoreData(
229
            $indexName,
230
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

230
            /** @scrutinizer ignore-type */ $total,
Loading history...
231
            $query,
232
            function ($item) {
233
                return [
234
                    'id' => $item->id,
235
                    'title' => (string) ($item->title ?? ''),
236
                    'filename' => (string) ($item->filename ?? ''),
237
                    'source' => (string) ($item->source ?? ''),
238
                    'dummy' => 1,
239
                ];
240
            }
241
        );
242
    }
243
244
    /**
245
     * Process data for ManticoreSearch with optimizations
246
     */
247
    private function processManticoreData(string $indexName, int $total, $query, callable $transformer): int
248
    {
249
        $manticore = new ManticoreSearch;
250
        $chunkSize = $this->getChunkSize();
251
        $batchSize = $this->getBatchSize();
252
253
        $this->optimizeDatabase();
254
        $this->setGroupConcatMaxLen();
255
256
        $this->info(sprintf(
257
            "Populating ManticoreSearch index '%s' with %s rows using chunks of %s and batch size of %s.",
258
            $indexName,
259
            number_format($total),
260
            number_format($chunkSize),
261
            number_format($batchSize)
262
        ));
263
264
        $bar = $this->output->createProgressBar($total);
265
        $bar->setFormat('verbose');
266
        $bar->start();
267
268
        $processedCount = 0;
269
        $errorCount = 0;
270
        $batchData = [];
271
272
        try {
273
            $query->chunk($chunkSize, function ($items) use ($manticore, $indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize, &$batchData) {
274
                foreach ($items as $item) {
275
                    try {
276
                        $batchData[] = $transformer($item);
277
                        $processedCount++;
278
279
                        // Process in optimized batch sizes
280
                        if (count($batchData) >= $batchSize) {
281
                            $this->processBatch($manticore, $indexName, $batchData);
282
                            $batchData = [];
283
                        }
284
                    } catch (Exception $e) {
285
                        $errorCount++;
286
                        if ($this->output->isVerbose()) {
287
                            $this->error("Error processing item {$item->id}: {$e->getMessage()}");
288
                        }
289
                    }
290
                    $bar->advance();
291
                }
292
            });
293
294
            // Process remaining items
295
            if (! empty($batchData)) {
296
                $this->processBatch($manticore, $indexName, $batchData);
297
            }
298
299
            $bar->finish();
300
            $this->newLine();
301
302
            if ($errorCount > 0) {
303
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
304
            } else {
305
                $this->info('ManticoreSearch population completed successfully!');
306
            }
307
308
            return Command::SUCCESS;
309
310
        } catch (Exception $e) {
311
            $bar->finish();
312
            $this->newLine();
313
            $this->error("Failed to populate ManticoreSearch: {$e->getMessage()}");
314
315
            return Command::FAILURE;
316
        } finally {
317
            $this->restoreDatabase();
318
        }
319
    }
320
321
    /**
322
     * Populate ElasticSearch releases index
323
     */
324
    private function elasticReleases(): int
325
    {
326
        $total = Release::count();
327
        if (! $total) {
328
            $this->warn('Releases table is empty. Nothing to do.');
329
330
            return Command::SUCCESS;
331
        }
332
333
        $query = Release::query()
334
            ->orderByDesc('releases.id')
335
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
336
            ->select([
337
                'releases.id',
338
                'releases.name',
339
                'releases.searchname',
340
                'releases.fromname',
341
                'releases.categories_id',
342
                'releases.postdate',
343
            ])
344
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
345
            ->groupBy([
346
                'releases.id',
347
                'releases.name',
348
                'releases.searchname',
349
                'releases.fromname',
350
                'releases.categories_id',
351
                'releases.postdate',
352
            ]);
353
354
        return $this->processElasticData(
355
            'releases',
356
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

356
            /** @scrutinizer ignore-type */ $total,
Loading history...
357
            $query,
358
            function ($item) {
359
                $searchName = str_replace(['.', '-'], ' ', $item->searchname ?? '');
360
361
                return [
362
                    'id' => $item->id,
363
                    'name' => $item->name,
364
                    'searchname' => $item->searchname,
365
                    'plainsearchname' => $searchName,
366
                    'fromname' => $item->fromname,
367
                    'categories_id' => $item->categories_id,
368
                    'filename' => $item->filename ?? '',
369
                    'postdate' => $item->postdate,
370
                ];
371
            }
372
        );
373
    }
374
375
    /**
376
     * Populate ElasticSearch predb index
377
     */
378
    private function elasticPredb(): int
379
    {
380
        $total = Predb::count();
381
        if (! $total) {
382
            $this->warn('PreDB table is empty. Nothing to do.');
383
384
            return Command::SUCCESS;
385
        }
386
387
        $query = Predb::query()
388
            ->select(['id', 'title', 'filename', 'source'])
389
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

389
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
390
391
        return $this->processElasticData(
392
            'predb',
393
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

393
            /** @scrutinizer ignore-type */ $total,
Loading history...
394
            $query,
395
            function ($item) {
396
                return [
397
                    'id' => $item->id,
398
                    'title' => $item->title,
399
                    'filename' => $item->filename,
400
                    'source' => $item->source,
401
                ];
402
            }
403
        );
404
    }
405
406
    /**
407
     * Process data for ElasticSearch with optimizations
408
     */
409
    private function processElasticData(string $indexName, int $total, $query, callable $transformer): int
410
    {
411
        $chunkSize = $this->getChunkSize();
412
        $batchSize = $this->getBatchSize();
413
414
        $this->optimizeDatabase();
415
        $this->setGroupConcatMaxLen();
416
417
        $this->info(sprintf(
418
            "Populating ElasticSearch index '%s' with %s rows using chunks of %s and batch size of %s.",
419
            $indexName,
420
            number_format($total),
421
            number_format($chunkSize),
422
            number_format($batchSize)
423
        ));
424
425
        $bar = $this->output->createProgressBar($total);
426
        $bar->setFormat('verbose');
427
        $bar->start();
428
429
        $processedCount = 0;
430
        $errorCount = 0;
431
432
        try {
433
            $query->chunk($chunkSize, function ($items) use ($indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize) {
434
                // Process in optimized batches for ElasticSearch
435
                foreach ($items->chunk($batchSize) as $batch) {
436
                    $data = ['body' => []];
437
438
                    foreach ($batch as $item) {
439
                        try {
440
                            $transformedData = $transformer($item);
441
442
                            $data['body'][] = [
443
                                'index' => [
444
                                    '_index' => $indexName,
445
                                    '_id' => $item->id,
446
                                ],
447
                            ];
448
                            $data['body'][] = $transformedData;
449
450
                            $processedCount++;
451
                        } catch (Exception $e) {
452
                            $errorCount++;
453
                            if ($this->output->isVerbose()) {
454
                                $this->error("Error processing item {$item->id}: {$e->getMessage()}");
455
                            }
456
                        }
457
458
                        $bar->advance();
459
                    }
460
461
                    if (! empty($data['body'])) {
462
                        $this->processElasticBatch($data, $errorCount);
463
                    }
464
                }
465
            });
466
467
            $bar->finish();
468
            $this->newLine();
469
470
            if ($errorCount > 0) {
471
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
472
            } else {
473
                $this->info('ElasticSearch population completed successfully!');
474
            }
475
476
            return Command::SUCCESS;
477
478
        } catch (Exception $e) {
479
            $bar->finish();
480
            $this->newLine();
481
            $this->error("Failed to populate ElasticSearch: {$e->getMessage()}");
482
483
            return Command::FAILURE;
484
        } finally {
485
            $this->restoreDatabase();
486
        }
487
    }
488
489
    /**
490
     * Process ManticoreSearch batch with retry logic
491
     */
492
    private function processBatch(ManticoreSearch $manticore, string $indexName, array $data): void
493
    {
494
        $retries = 3;
495
        $attempt = 0;
496
497
        while ($attempt < $retries) {
498
            try {
499
                $manticore->manticoreSearch->table($indexName)->replaceDocuments($data);
500
                break;
501
            } catch (Exception $e) {
502
                $attempt++;
503
                if ($attempt >= $retries) {
504
                    throw $e;
505
                }
506
                usleep(100000); // 100ms delay before retry
507
            }
508
        }
509
    }
510
511
    /**
512
     * Process ElasticSearch batch with retry logic
513
     */
514
    private function processElasticBatch(array $data, int &$errorCount): void
515
    {
516
        $retries = 3;
517
        $attempt = 0;
518
519
        while ($attempt < $retries) {
520
            try {
521
                $response = \Elasticsearch::bulk($data);
522
523
                // Check for errors in bulk response
524
                if (isset($response['errors']) && $response['errors']) {
525
                    foreach ($response['items'] as $item) {
526
                        if (isset($item['index']['error'])) {
527
                            $errorCount++;
528
                            if ($this->output->isVerbose()) {
529
                                $this->error('ElasticSearch error: '.json_encode($item['index']['error']));
530
                            }
531
                        }
532
                    }
533
                }
534
                break;
535
            } catch (Exception $e) {
536
                $attempt++;
537
                if ($attempt >= $retries) {
538
                    throw $e;
539
                }
540
                usleep(100000); // 100ms delay before retry
541
            }
542
        }
543
    }
544
545
    /**
546
     * Optimize database settings for bulk operations
547
     */
548
    private function optimizeDatabase(): void
549
    {
550
        if ($this->option('disable-keys')) {
551
            $this->info('Disabling database keys for faster bulk operations...');
552
553
            try {
554
                // Disable foreign key checks
555
                DB::statement('SET FOREIGN_KEY_CHECKS = 0');
556
                DB::statement('SET UNIQUE_CHECKS = 0');
557
                DB::statement('SET AUTOCOMMIT = 0');
558
559
                // Increase buffer sizes
560
                DB::statement('SET SESSION innodb_buffer_pool_size = 1073741824'); // 1GB
561
                DB::statement('SET SESSION bulk_insert_buffer_size = 268435456'); // 256MB
562
                DB::statement('SET SESSION read_buffer_size = 2097152'); // 2MB
563
                DB::statement('SET SESSION sort_buffer_size = 16777216'); // 16MB
564
565
            } catch (Exception $e) {
566
                $this->warn("Could not optimize database settings: {$e->getMessage()}");
567
            }
568
        }
569
    }
570
571
    /**
572
     * Restore database settings after bulk operations
573
     */
574
    private function restoreDatabase(): void
575
    {
576
        if ($this->option('disable-keys')) {
577
            $this->info('Restoring database settings...');
578
579
            try {
580
                DB::statement('SET FOREIGN_KEY_CHECKS = 1');
581
                DB::statement('SET UNIQUE_CHECKS = 1');
582
                DB::statement('SET AUTOCOMMIT = 1');
583
                DB::statement('COMMIT');
584
            } catch (Exception $e) {
585
                $this->warn("Could not restore database settings: {$e->getMessage()}");
586
            }
587
        }
588
    }
589
590
    /**
591
     * Get the chunk size from options
592
     */
593
    private function getChunkSize(): int
594
    {
595
        $chunkSize = (int) $this->option('count');
596
597
        return $chunkSize > 0 ? $chunkSize : self::DEFAULT_CHUNK_SIZE;
598
    }
599
600
    /**
601
     * Get the batch size from options
602
     */
603
    private function getBatchSize(): int
604
    {
605
        $batchSize = (int) $this->option('batch-size');
606
607
        return $batchSize > 0 ? $batchSize : self::DEFAULT_BATCH_SIZE;
608
    }
609
610
    /**
611
     * Set the GROUP_CONCAT max length for the session
612
     */
613
    private function setGroupConcatMaxLen(): void
614
    {
615
        DB::statement('SET SESSION group_concat_max_len = ?', [self::GROUP_CONCAT_MAX_LEN]);
616
    }
617
}
618