Issues (374)

Console/Commands/NntmuxPopulateSearchIndexes.php (6 issues)

Labels
Severity
1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Predb;
6
use App\Models\Release;
7
use Blacklight\ManticoreSearch;
8
use Exception;
9
use Illuminate\Console\Command;
10
use Illuminate\Support\Arr;
11
use Illuminate\Support\Facades\DB;
12
13
class NntmuxPopulateSearchIndexes extends Command
14
{
15
    /**
16
     * The name and signature of the console command.
17
     *
18
     * @var string
19
     */
20
    protected $signature = 'nntmux:populate
21
                                       {--manticore : Use ManticoreSearch}
22
                                       {--elastic : Use ElasticSearch}
23
                                       {--releases : Populates the releases index}
24
                                       {--predb : Populates the predb index}
25
                                       {--count=20000 : Sets the chunk size}
26
                                       {--optimize : Optimize ManticoreSearch indexes}';
27
28
    /**
29
     * The console command description.
30
     *
31
     * @var string
32
     */
33
    protected $description = 'Populate Manticore/Elasticsearch indexes with either releases or predb';
34
35
    private const SUPPORTED_ENGINES = ['manticore', 'elastic'];
36
37
    private const SUPPORTED_INDEXES = ['releases', 'predb'];
38
39
    private const GROUP_CONCAT_MAX_LEN = 16384;
40
41
    private const DEFAULT_CHUNK_SIZE = 20000;
42
43
    /**
44
     * Execute the console command.
45
     */
46
    public function handle(): int
47
    {
48
        try {
49
            if ($this->option('optimize')) {
50
                return $this->handleOptimize();
51
            }
52
53
            $engine = $this->getSelectedEngine();
54
            $index = $this->getSelectedIndex();
55
56
            if (! $engine || ! $index) {
57
                $this->error('You must specify both an engine (--manticore or --elastic) and an index (--releases or --predb).');
58
                $this->info('Use --help to see all available options.');
59
60
                return Command::FAILURE;
61
            }
62
63
            return $this->populateIndex($engine, $index);
64
65
        } catch (Exception $e) {
66
            $this->error("An error occurred: {$e->getMessage()}");
67
68
            if ($this->output->isVerbose()) {
69
                $this->error($e->getTraceAsString());
70
            }
71
72
            return Command::FAILURE;
73
        }
74
    }
75
76
    /**
77
     * Get the selected search engine from options
78
     */
79
    private function getSelectedEngine(): ?string
80
    {
81
        foreach (self::SUPPORTED_ENGINES as $engine) {
82
            if ($this->option($engine)) {
83
                return $engine;
84
            }
85
        }
86
87
        return null;
88
    }
89
90
    /**
91
     * Get the selected index from options
92
     */
93
    private function getSelectedIndex(): ?string
94
    {
95
        foreach (self::SUPPORTED_INDEXES as $index) {
96
            if ($this->option($index)) {
97
                return $index;
98
            }
99
        }
100
101
        return null;
102
    }
103
104
    /**
105
     * Handle the optimize command
106
     */
107
    private function handleOptimize(): int
108
    {
109
        $this->info('Optimizing ManticoreSearch indexes...');
110
111
        try {
112
            (new ManticoreSearch)->optimizeRTIndex();
113
            $this->info('Optimization completed successfully!');
114
115
            return Command::SUCCESS;
116
        } catch (Exception $e) {
117
            $this->error("Optimization failed: {$e->getMessage()}");
118
119
            return Command::FAILURE;
120
        }
121
    }
122
123
    /**
124
     * Populate the specified index with the specified engine
125
     */
126
    private function populateIndex(string $engine, string $index): int
127
    {
128
        $methodName = "{$engine}".ucfirst($index);
129
130
        if (! method_exists($this, $methodName)) {
131
            $this->error("Method {$methodName} not implemented.");
132
133
            return Command::FAILURE;
134
        }
135
136
        $this->info("Starting {$engine} {$index} population...");
137
138
        $startTime = microtime(true);
139
        $result = $this->{$methodName}();
140
        $executionTime = round(microtime(true) - $startTime, 2);
141
142
        if ($result === Command::SUCCESS) {
143
            $this->info("Population completed in {$executionTime} seconds.");
144
        }
145
146
        return $result;
147
    }
148
149
    private function manticoreReleases(): int
150
    {
151
        $manticore = new ManticoreSearch;
152
        $indexName = 'releases_rt';
153
154
        $manticore->truncateRTIndex(Arr::wrap($indexName));
155
156
        $total = Release::count();
157
        if (! $total) {
158
            $this->warn('Releases table is empty. Nothing to do.');
159
160
            return Command::SUCCESS;
161
        }
162
163
        $query = Release::query()
164
            ->orderByDesc('releases.id')
165
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
166
            ->select([
167
                'releases.id',
168
                'releases.name',
169
                'releases.searchname',
170
                'releases.fromname',
171
                'releases.categories_id',
172
            ])
173
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
174
            ->groupBy([
175
                'releases.id',
176
                'releases.name',
177
                'releases.searchname',
178
                'releases.fromname',
179
                'releases.categories_id',
180
            ]);
181
182
        return $this->processManticoreData(
183
            $indexName,
184
            $total,
0 ignored issues
show
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

184
            /** @scrutinizer ignore-type */ $total,
Loading history...
185
            $query,
186
            function ($item) {
187
                return [
188
                    'id' => (string) $item->id,
189
                    'name' => (string) ($item->name ?: ''),
190
                    'searchname' => (string) ($item->searchname ?: ''),
191
                    'fromname' => (string) ($item->fromname ?: ''),
192
                    'categories_id' => (string) ($item->categories_id ?: '0'),
193
                    'filename' => (string) ($item->filename ?: ''),
194
                    'dummy' => '1',
195
                ];
196
            }
197
        );
198
    }
199
200
    /**
201
     * Populate ManticoreSearch predb index
202
     */
203
    private function manticorePredb(): int
204
    {
205
        $manticore = new ManticoreSearch;
206
        $indexName = 'predb_rt';
207
208
        $manticore->truncateRTIndex([$indexName]);
209
210
        $total = Predb::count();
211
        if (! $total) {
212
            $this->warn('PreDB table is empty. Nothing to do.');
213
214
            return Command::SUCCESS;
215
        }
216
217
        $query = Predb::query()
218
            ->select(['id', 'title', 'filename', 'source'])
219
            ->orderBy('id');
0 ignored issues
show
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

219
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
220
221
        return $this->processManticoreData(
222
            $indexName,
223
            $total,
0 ignored issues
show
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

223
            /** @scrutinizer ignore-type */ $total,
Loading history...
224
            $query,
225
            function ($item) {
226
                return [
227
                    'id' => $item->id,
228
                    'title' => (string) ($item->title ?? ''),
229
                    'filename' => (string) ($item->filename ?? ''),
230
                    'source' => (string) ($item->source ?? ''),
231
                    'dummy' => 1,
232
                ];
233
            }
234
        );
235
    }
236
237
    /**
238
     * Process data for ManticoreSearch
239
     */
240
    private function processManticoreData(string $indexName, int $total, $query, callable $transformer): int
241
    {
242
        $manticore = new ManticoreSearch;
243
        $chunkSize = $this->getChunkSize();
244
245
        $this->setGroupConcatMaxLen();
246
247
        $this->info(sprintf(
248
            "Populating ManticoreSearch index '%s' with %s rows using chunks of %s.",
249
            $indexName,
250
            number_format($total),
251
            number_format($chunkSize)
252
        ));
253
254
        $bar = $this->output->createProgressBar($total);
255
        $bar->setFormat('verbose');
256
        $bar->start();
257
258
        $processedCount = 0;
259
        $errorCount = 0;
260
261
        try {
262
            $query->chunk($chunkSize, function ($items) use ($manticore, $indexName, $transformer, $bar, &$processedCount, &$errorCount) {
263
                $data = [];
264
265
                foreach ($items as $item) {
266
                    try {
267
                        $data[] = $transformer($item);
268
                        $processedCount++;
269
                    } catch (Exception $e) {
270
                        $errorCount++;
271
                        if ($this->output->isVerbose()) {
272
                            $this->error("Error processing item {$item->id}: {$e->getMessage()}");
273
                        }
274
                    }
275
                    $bar->advance();
276
                }
277
278
                if (! empty($data)) {
279
                    $manticore->manticoreSearch->table($indexName)->replaceDocuments($data);
280
                }
281
            });
282
283
            $bar->finish();
284
            $this->newLine();
285
286
            if ($errorCount > 0) {
287
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
288
            } else {
289
                $this->info('ManticoreSearch population completed successfully!');
290
            }
291
292
            return Command::SUCCESS;
293
294
        } catch (Exception $e) {
295
            $bar->finish();
296
            $this->newLine();
297
            $this->error("Failed to populate ManticoreSearch: {$e->getMessage()}");
298
299
            return Command::FAILURE;
300
        }
301
    }
302
303
    /**
304
     * Populate ElasticSearch releases index
305
     */
306
    private function elasticReleases(): int
307
    {
308
        $total = Release::count();
309
        if (! $total) {
310
            $this->warn('Releases table is empty. Nothing to do.');
311
312
            return Command::SUCCESS;
313
        }
314
315
        $query = Release::query()
316
            ->orderByDesc('releases.id')
317
            ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id')
318
            ->select([
319
                'releases.id',
320
                'releases.name',
321
                'releases.searchname',
322
                'releases.fromname',
323
                'releases.categories_id',
324
                'releases.postdate',
325
            ])
326
            ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename')
327
            ->groupBy([
328
                'releases.id',
329
                'releases.name',
330
                'releases.searchname',
331
                'releases.fromname',
332
                'releases.categories_id',
333
                'releases.postdate',
334
            ]);
335
336
        return $this->processElasticData(
337
            'releases',
338
            $total,
0 ignored issues
show
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

338
            /** @scrutinizer ignore-type */ $total,
Loading history...
339
            $query,
340
            function ($item) {
341
                $searchName = str_replace(['.', '-'], ' ', $item->searchname ?? '');
342
343
                return [
344
                    'id' => $item->id,
345
                    'name' => $item->name,
346
                    'searchname' => $item->searchname,
347
                    'plainsearchname' => $searchName,
348
                    'fromname' => $item->fromname,
349
                    'categories_id' => $item->categories_id,
350
                    'filename' => $item->filename ?? '',
351
                    'postdate' => $item->postdate,
352
                ];
353
            }
354
        );
355
    }
356
357
    /**
358
     * Populate ElasticSearch predb index
359
     */
360
    private function elasticPredb(): int
361
    {
362
        $total = Predb::count();
363
        if (! $total) {
364
            $this->warn('PreDB table is empty. Nothing to do.');
365
366
            return Command::SUCCESS;
367
        }
368
369
        $query = Predb::query()
370
            ->select(['id', 'title', 'filename', 'source'])
371
            ->orderBy('id');
0 ignored issues
show
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

371
            ->orderBy(/** @scrutinizer ignore-type */ 'id');
Loading history...
372
373
        return $this->processElasticData(
374
            'predb',
375
            $total,
0 ignored issues
show
It seems like $total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

375
            /** @scrutinizer ignore-type */ $total,
Loading history...
376
            $query,
377
            function ($item) {
378
                return [
379
                    'id' => $item->id,
380
                    'title' => $item->title,
381
                    'filename' => $item->filename,
382
                    'source' => $item->source,
383
                ];
384
            }
385
        );
386
    }
387
388
    /**
389
     * Process data for ElasticSearch
390
     */
391
    private function processElasticData(string $indexName, int $total, $query, callable $transformer): int
392
    {
393
        $chunkSize = $this->getChunkSize();
394
395
        $this->setGroupConcatMaxLen();
396
397
        $this->info(sprintf(
398
            "Populating ElasticSearch index '%s' with %s rows using chunks of %s.",
399
            $indexName,
400
            number_format($total),
401
            number_format($chunkSize)
402
        ));
403
404
        $bar = $this->output->createProgressBar($total);
405
        $bar->setFormat('verbose');
406
        $bar->start();
407
408
        $processedCount = 0;
409
        $errorCount = 0;
410
        $batchSize = min($chunkSize, 1000); // ElasticSearch performs better with smaller bulk sizes
411
412
        try {
413
            $query->chunk($chunkSize, function ($items) use ($indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize) {
414
                // Process in smaller batches for ElasticSearch
415
                foreach ($items->chunk($batchSize) as $batch) {
416
                    $data = ['body' => []];
417
418
                    foreach ($batch as $item) {
419
                        try {
420
                            $transformedData = $transformer($item);
421
422
                            $data['body'][] = [
423
                                'index' => [
424
                                    '_index' => $indexName,
425
                                    '_id' => $item->id,
426
                                ],
427
                            ];
428
                            $data['body'][] = $transformedData;
429
430
                            $processedCount++;
431
                        } catch (Exception $e) {
432
                            $errorCount++;
433
                            if ($this->output->isVerbose()) {
434
                                $this->error("Error processing item {$item->id}: {$e->getMessage()}");
435
                            }
436
                        }
437
438
                        $bar->advance();
439
                    }
440
441
                    if (! empty($data['body'])) {
442
                        $response = \Elasticsearch::bulk($data);
443
444
                        // Check for errors in bulk response
445
                        if (isset($response['errors']) && $response['errors']) {
446
                            foreach ($response['items'] as $item) {
447
                                if (isset($item['index']['error'])) {
448
                                    $errorCount++;
449
                                    if ($this->output->isVerbose()) {
450
                                        $this->error('ElasticSearch error: '.json_encode($item['index']['error']));
451
                                    }
452
                                }
453
                            }
454
                        }
455
                    }
456
                }
457
            });
458
459
            $bar->finish();
460
            $this->newLine();
461
462
            if ($errorCount > 0) {
463
                $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items.");
464
            } else {
465
                $this->info('ElasticSearch population completed successfully!');
466
            }
467
468
            return Command::SUCCESS;
469
470
        } catch (Exception $e) {
471
            $bar->finish();
472
            $this->newLine();
473
            $this->error("Failed to populate ElasticSearch: {$e->getMessage()}");
474
475
            return Command::FAILURE;
476
        }
477
    }
478
479
    /**
480
     * Get the chunk size from options
481
     */
482
    private function getChunkSize(): int
483
    {
484
        $chunkSize = (int) $this->option('count');
485
486
        return $chunkSize > 0 ? $chunkSize : self::DEFAULT_CHUNK_SIZE;
487
    }
488
489
    /**
490
     * Set the GROUP_CONCAT max length for the session
491
     */
492
    private function setGroupConcatMaxLen(): void
493
    {
494
        DB::statement('SET SESSION group_concat_max_len = ?', [self::GROUP_CONCAT_MAX_LEN]);
495
    }
496
}
497