Passed
Push — master ( 270f36...5637d3 )
by Darko
07:40
created

NntmuxPopulateSearchIndexes::manticorePredb()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 29
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 1 Features 0
Metric Value
eloc 21
c 2
b 1
f 0
dl 0
loc 29
rs 9.584
cc 2
nc 2
nop 0
1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Predb;
6
use App\Models\Release;
7
use Blacklight\ManticoreSearch;
8
use Exception;
9
use Illuminate\Console\Command;
10
use Illuminate\Support\Arr;
11
use Illuminate\Support\Facades\DB;
12
13
class NntmuxPopulateSearchIndexes extends Command
14
{
15
    /**
16
     * The name and signature of the console command.
17
     *
18
     * @var string
19
     */
20
    protected $signature = 'nntmux:populate
21
                                       {--manticore : Use ManticoreSearch}
22
                                       {--elastic : Use ElasticSearch}
23
                                       {--releases : Populates the releases index}
24
                                       {--predb : Populates the predb index}
25
                                       {--count=20000 : Sets the chunk size}
26
                                       {--optimize : Optimize ManticoreSearch indexes}';
27
28
    /**
29
     * The console command description.
30
     *
31
     * @var string
32
     */
33
    protected $description = 'Populate Manticore/Elasticsearch indexes with either releases or predb';
34
35
    private const SUPPORTED_ENGINES = ['manticore', 'elastic'];
36
37
    private const SUPPORTED_INDEXES = ['releases', 'predb'];
38
39
    private const GROUP_CONCAT_MAX_LEN = 16384;
40
41
    private const DEFAULT_CHUNK_SIZE = 20000;
42
43
    /**
44
     * Execute the console command.
45
     */
46
    public function handle(): int
47
    {
48
        try {
49
            if ($this->option('optimize')) {
50
                return $this->handleOptimize();
51
            }
52
53
            $engine = $this->getSelectedEngine();
54
            $index = $this->getSelectedIndex();
55
56
            if (! $engine || ! $index) {
57
                $this->error('You must specify both an engine (--manticore or --elastic) and an index (--releases or --predb).');
58
                $this->info('Use --help to see all available options.');
59
60
                return Command::FAILURE;
61
            }
62
63
            return $this->populateIndex($engine, $index);
64
65
        } catch (Exception $e) {
66
            $this->error("An error occurred: {$e->getMessage()}");
67
68
            if ($this->output->isVerbose()) {
69
                $this->error($e->getTraceAsString());
70
            }
71
72
            return Command::FAILURE;
73
        }
74
    }
75
76
    /**
77
     * Get the selected search engine from options
78
     */
79
    private function getSelectedEngine(): ?string
80
    {
81
        foreach (self::SUPPORTED_ENGINES as $engine) {
82
            if ($this->option($engine)) {
83
                return $engine;
84
            }
85
        }
86
87
        return null;
88
    }
89
90
    /**
91
     * Get the selected index from options
92
     */
93
    private function getSelectedIndex(): ?string
94
    {
95
        foreach (self::SUPPORTED_INDEXES as $index) {
96
            if ($this->option($index)) {
97
                return $index;
98
            }
99
        }
100
101
        return null;
102
    }
103
104
    /**
105
     * Handle the optimize command
106
     */
107
    private function handleOptimize(): int
108
    {
109
        $this->info('Optimizing ManticoreSearch indexes...');
110
111
        try {
112
            (new ManticoreSearch)->optimizeRTIndex();
113
            $this->info('Optimization completed successfully!');
114
115
            return Command::SUCCESS;
116
        } catch (Exception $e) {
117
            $this->error("Optimization failed: {$e->getMessage()}");
118
119
            return Command::FAILURE;
120
        }
121
    }
122
123
    /**
124
     * Populate the specified index with the specified engine
125
     */
126
    private function populateIndex(string $engine, string $index): int
127
    {
128
        $methodName = "{$engine}".ucfirst($index);
129
130
        if (! method_exists($this, $methodName)) {
131
            $this->error("Method {$methodName} not implemented.");
132
133
            return Command::FAILURE;
134
        }
135
136
        $this->info("Starting {$engine} {$index} population...");
137
138
        $startTime = microtime(true);
139
        $result = $this->{$methodName}();
140
        $executionTime = round(microtime(true) - $startTime, 2);
141
142
        if ($result === Command::SUCCESS) {
143
            $this->info("Population completed in {$executionTime} seconds.");
144
        }
145
146
        return $result;
147
    }
148
149
    /**
150
     * Populate ManticoreSearch releases index
151
     */
152
    private function manticoreReleases(): int
153
    {
154
        $manticore = new ManticoreSearch;
155
        $indexName = 'releases_rt';
156
157
        $manticore->truncateRTIndex(Arr::wrap($indexName));
158
159
        $total = Release::count();
160
        if (! $total) {
161
            $this->warn('Releases table is empty. Nothing to do.');
162
163
            return Command::SUCCESS;
164
        }
165
166
        $query = Release::query()
167
            ->orderByDesc('releases.id')
168
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
169
            ->select([
170
                'releases.id',
171
                'releases.name',
172
                'releases.searchname',
173
                'releases.fromname',
174
                'releases.categories_id',
175
            ])
176
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
177
            ->groupBy([
178
                'releases.id',
179
                'releases.name',
180
                'releases.searchname',
181
                'releases.fromname',
182
                'releases.categories_id',
183
            ]);
184
185
        return $this->processManticoreData(
186
            $indexName,
187
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

187
            /** @scrutinizer ignore-type */ $total,
Loading history...
188
            $query,
189
            function ($item) {
190
                return [
191
                    'id' => $item->id,
192
                    'name' => (string) ($item->name ?? ''),
193
                    'searchname' => (string) ($item->searchname ?? ''),
194
                    'fromname' => (string) ($item->fromname ?? ''),
195
                    'categories_id' => (int) ($item->categories_id ?? 0),
196
                    'filename' => (string) ($item->filename ?? ''),
197
                    'dummy' => 1,
198
                ];
199
            }
200
        );
201
    }
202
203
    /**
204
     * Populate ManticoreSearch predb index
205
     */
206
    private function manticorePredb(): int
207
    {
208
        $manticore = new ManticoreSearch;
209
        $indexName = 'predb_rt';
210
211
        $manticore->truncateRTIndex([$indexName]);
212
213
        $total = Predb::count();
214
        if (! $total) {
215
            $this->warn('PreDB table is empty. Nothing to do.');
216
217
            return Command::SUCCESS;
218
        }
219
220
        $query = Predb::query()
221
            ->select(['id', 'title', 'filename', 'source'])
222
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

222
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
223
224
        return $this->processManticoreData(
225
            $indexName,
226
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

226
            /** @scrutinizer ignore-type */ $total,
Loading history...
227
            $query,
228
            function ($item) {
229
                return [
230
                    'id' => $item->id,
231
                    'title' => (string) ($item->title ?? ''),
232
                    'filename' => (string) ($item->filename ?? ''),
233
                    'source' => (string) ($item->source ?? ''),
234
                    'dummy' => 1,
235
                ];
236
            }
237
        );
238
    }
239
240
    /**
241
     * Process data for ManticoreSearch
242
     */
243
    private function processManticoreData(string $indexName, int $total, $query, callable $transformer): int
244
    {
245
        $manticore = new ManticoreSearch;
246
        $chunkSize = $this->getChunkSize();
247
248
        $this->setGroupConcatMaxLen();
249
250
        $this->info(sprintf(
251
            "Populating ManticoreSearch index '%s' with %s rows using chunks of %s.",
252
            $indexName,
253
            number_format($total),
254
            number_format($chunkSize)
255
        ));
256
257
        $bar = $this->output->createProgressBar($total);
258
        $bar->setFormat('verbose');
259
        $bar->start();
260
261
        $processedCount = 0;
262
        $errorCount = 0;
263
264
        try {
265
            $query->chunk($chunkSize, function ($items) use ($manticore, $indexName, $transformer, $bar, &$processedCount, &$errorCount) {
266
                $data = [];
267
268
                foreach ($items as $item) {
269
                    try {
270
                        $data[] = $transformer($item);
271
                        $processedCount++;
272
                    } catch (Exception $e) {
273
                        $errorCount++;
274
                        if ($this->output->isVerbose()) {
275
                            $this->error("Error processing item {$item->id}: {$e->getMessage()}");
276
                        }
277
                    }
278
                    $bar->advance();
279
                }
280
281
                if (! empty($data)) {
282
                    $manticore->manticoreSearch->table($indexName)->replaceDocuments($data);
283
                }
284
            });
285
286
            $bar->finish();
287
            $this->newLine();
288
289
            if ($errorCount > 0) {
290
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
291
            } else {
292
                $this->info('ManticoreSearch population completed successfully!');
293
            }
294
295
            return Command::SUCCESS;
296
297
        } catch (Exception $e) {
298
            $bar->finish();
299
            $this->newLine();
300
            $this->error("Failed to populate ManticoreSearch: {$e->getMessage()}");
301
302
            return Command::FAILURE;
303
        }
304
    }
305
306
    /**
307
     * Populate ElasticSearch releases index
308
     */
309
    private function elasticReleases(): int
310
    {
311
        $total = Release::count();
312
        if (! $total) {
313
            $this->warn('Releases table is empty. Nothing to do.');
314
315
            return Command::SUCCESS;
316
        }
317
318
        $query = Release::query()
319
            ->orderByDesc('releases.id')
320
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
321
            ->select([
322
                'releases.id',
323
                'releases.name',
324
                'releases.searchname',
325
                'releases.fromname',
326
                'releases.categories_id',
327
                'releases.postdate',
328
            ])
329
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
330
            ->groupBy([
331
                'releases.id',
332
                'releases.name',
333
                'releases.searchname',
334
                'releases.fromname',
335
                'releases.categories_id',
336
                'releases.postdate',
337
            ]);
338
339
        return $this->processElasticData(
340
            'releases',
341
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

341
            /** @scrutinizer ignore-type */ $total,
Loading history...
342
            $query,
343
            function ($item) {
344
                $searchName = str_replace(['.', '-'], ' ', $item->searchname ?? '');
345
346
                return [
347
                    'id' => $item->id,
348
                    'name' => $item->name,
349
                    'searchname' => $item->searchname,
350
                    'plainsearchname' => $searchName,
351
                    'fromname' => $item->fromname,
352
                    'categories_id' => $item->categories_id,
353
                    'filename' => $item->filename ?? '',
354
                    'postdate' => $item->postdate,
355
                ];
356
            }
357
        );
358
    }
359
360
    /**
361
     * Populate ElasticSearch predb index
362
     */
363
    private function elasticPredb(): int
364
    {
365
        $total = Predb::count();
366
        if (! $total) {
367
            $this->warn('PreDB table is empty. Nothing to do.');
368
369
            return Command::SUCCESS;
370
        }
371
372
        $query = Predb::query()
373
            ->select(['id', 'title', 'filename', 'source'])
374
            ->orderBy('id');
0 ignored issues
show
Bug introduced by
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

374
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
375
376
        return $this->processElasticData(
377
            'predb',
378
            $total,
0 ignored issues
show
Bug introduced by
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

378
            /** @scrutinizer ignore-type */ $total,
Loading history...
379
            $query,
380
            function ($item) {
381
                return [
382
                    'id' => $item->id,
383
                    'title' => $item->title,
384
                    'filename' => $item->filename,
385
                    'source' => $item->source,
386
                ];
387
            }
388
        );
389
    }
390
391
    /**
392
     * Process data for ElasticSearch
393
     */
394
    private function processElasticData(string $indexName, int $total, $query, callable $transformer): int
395
    {
396
        $chunkSize = $this->getChunkSize();
397
398
        $this->setGroupConcatMaxLen();
399
400
        $this->info(sprintf(
401
            "Populating ElasticSearch index '%s' with %s rows using chunks of %s.",
402
            $indexName,
403
            number_format($total),
404
            number_format($chunkSize)
405
        ));
406
407
        $bar = $this->output->createProgressBar($total);
408
        $bar->setFormat('verbose');
409
        $bar->start();
410
411
        $processedCount = 0;
412
        $errorCount = 0;
413
        $batchSize = min($chunkSize, 1000); // ElasticSearch performs better with smaller bulk sizes
414
415
        try {
416
            $query->chunk($chunkSize, function ($items) use ($indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize) {
417
                // Process in smaller batches for ElasticSearch
418
                foreach ($items->chunk($batchSize) as $batch) {
419
                    $data = ['body' => []];
420
421
                    foreach ($batch as $item) {
422
                        try {
423
                            $transformedData = $transformer($item);
424
425
                            $data['body'][] = [
426
                                'index' => [
427
                                    '_index' => $indexName,
428
                                    '_id' => $item->id,
429
                                ],
430
                            ];
431
                            $data['body'][] = $transformedData;
432
433
                            $processedCount++;
434
                        } catch (Exception $e) {
435
                            $errorCount++;
436
                            if ($this->output->isVerbose()) {
437
                                $this->error("Error processing item {$item->id}: {$e->getMessage()}");
438
                            }
439
                        }
440
441
                        $bar->advance();
442
                    }
443
444
                    if (! empty($data['body'])) {
445
                        $response = \Elasticsearch::bulk($data);
446
447
                        // Check for errors in bulk response
448
                        if (isset($response['errors']) && $response['errors']) {
449
                            foreach ($response['items'] as $item) {
450
                                if (isset($item['index']['error'])) {
451
                                    $errorCount++;
452
                                    if ($this->output->isVerbose()) {
453
                                        $this->error('ElasticSearch error: '.json_encode($item['index']['error']));
454
                                    }
455
                                }
456
                            }
457
                        }
458
                    }
459
                }
460
            });
461
462
            $bar->finish();
463
            $this->newLine();
464
465
            if ($errorCount > 0) {
466
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
467
            } else {
468
                $this->info('ElasticSearch population completed successfully!');
469
            }
470
471
            return Command::SUCCESS;
472
473
        } catch (Exception $e) {
474
            $bar->finish();
475
            $this->newLine();
476
            $this->error("Failed to populate ElasticSearch: {$e->getMessage()}");
477
478
            return Command::FAILURE;
479
        }
480
    }
481
482
    /**
483
     * Get the chunk size from options
484
     */
485
    private function getChunkSize(): int
486
    {
487
        $chunkSize = (int) $this->option('count');
488
489
        return $chunkSize > 0 ? $chunkSize : self::DEFAULT_CHUNK_SIZE;
490
    }
491
492
    /**
493
     * Set the GROUP_CONCAT max length for the session
494
     */
495
    private function setGroupConcatMaxLen(): void
496
    {
497
        DB::statement('SET SESSION group_concat_max_len = ?', [self::GROUP_CONCAT_MAX_LEN]);
498
    }
499
}
500