1 | <?php |
||||
2 | |||||
3 | namespace App\Console\Commands; |
||||
4 | |||||
5 | use App\Models\Predb; |
||||
6 | use App\Models\Release; |
||||
7 | use Blacklight\ManticoreSearch; |
||||
8 | use Exception; |
||||
9 | use Illuminate\Console\Command; |
||||
10 | use Illuminate\Support\Arr; |
||||
11 | use Illuminate\Support\Facades\DB; |
||||
12 | |||||
13 | class NntmuxPopulateSearchIndexes extends Command |
||||
14 | { |
||||
15 | /** |
||||
16 | * The name and signature of the console command. |
||||
17 | * |
||||
18 | * @var string |
||||
19 | */ |
||||
20 | protected $signature = 'nntmux:populate |
||||
21 | {--manticore : Use ManticoreSearch} |
||||
22 | {--elastic : Use ElasticSearch} |
||||
23 | {--releases : Populates the releases index} |
||||
24 | {--predb : Populates the predb index} |
||||
25 | {--count=20000 : Sets the chunk size} |
||||
26 | {--optimize : Optimize ManticoreSearch indexes}'; |
||||
27 | |||||
28 | /** |
||||
29 | * The console command description. |
||||
30 | * |
||||
31 | * @var string |
||||
32 | */ |
||||
33 | protected $description = 'Populate Manticore/Elasticsearch indexes with either releases or predb'; |
||||
34 | |||||
35 | private const SUPPORTED_ENGINES = ['manticore', 'elastic']; |
||||
36 | |||||
37 | private const SUPPORTED_INDEXES = ['releases', 'predb']; |
||||
38 | |||||
39 | private const GROUP_CONCAT_MAX_LEN = 16384; |
||||
40 | |||||
41 | private const DEFAULT_CHUNK_SIZE = 20000; |
||||
42 | |||||
43 | /** |
||||
44 | * Execute the console command. |
||||
45 | */ |
||||
46 | public function handle(): int |
||||
47 | { |
||||
48 | try { |
||||
49 | if ($this->option('optimize')) { |
||||
50 | return $this->handleOptimize(); |
||||
51 | } |
||||
52 | |||||
53 | $engine = $this->getSelectedEngine(); |
||||
54 | $index = $this->getSelectedIndex(); |
||||
55 | |||||
56 | if (! $engine || ! $index) { |
||||
57 | $this->error('You must specify both an engine (--manticore or --elastic) and an index (--releases or --predb).'); |
||||
58 | $this->info('Use --help to see all available options.'); |
||||
59 | |||||
60 | return Command::FAILURE; |
||||
61 | } |
||||
62 | |||||
63 | return $this->populateIndex($engine, $index); |
||||
64 | |||||
65 | } catch (Exception $e) { |
||||
66 | $this->error("An error occurred: {$e->getMessage()}"); |
||||
67 | |||||
68 | if ($this->output->isVerbose()) { |
||||
69 | $this->error($e->getTraceAsString()); |
||||
70 | } |
||||
71 | |||||
72 | return Command::FAILURE; |
||||
73 | } |
||||
74 | } |
||||
75 | |||||
76 | /** |
||||
77 | * Get the selected search engine from options |
||||
78 | */ |
||||
79 | private function getSelectedEngine(): ?string |
||||
80 | { |
||||
81 | foreach (self::SUPPORTED_ENGINES as $engine) { |
||||
82 | if ($this->option($engine)) { |
||||
83 | return $engine; |
||||
84 | } |
||||
85 | } |
||||
86 | |||||
87 | return null; |
||||
88 | } |
||||
89 | |||||
90 | /** |
||||
91 | * Get the selected index from options |
||||
92 | */ |
||||
93 | private function getSelectedIndex(): ?string |
||||
94 | { |
||||
95 | foreach (self::SUPPORTED_INDEXES as $index) { |
||||
96 | if ($this->option($index)) { |
||||
97 | return $index; |
||||
98 | } |
||||
99 | } |
||||
100 | |||||
101 | return null; |
||||
102 | } |
||||
103 | |||||
104 | /** |
||||
105 | * Handle the optimize command |
||||
106 | */ |
||||
107 | private function handleOptimize(): int |
||||
108 | { |
||||
109 | $this->info('Optimizing ManticoreSearch indexes...'); |
||||
110 | |||||
111 | try { |
||||
112 | (new ManticoreSearch)->optimizeRTIndex(); |
||||
113 | $this->info('Optimization completed successfully!'); |
||||
114 | |||||
115 | return Command::SUCCESS; |
||||
116 | } catch (Exception $e) { |
||||
117 | $this->error("Optimization failed: {$e->getMessage()}"); |
||||
118 | |||||
119 | return Command::FAILURE; |
||||
120 | } |
||||
121 | } |
||||
122 | |||||
123 | /** |
||||
124 | * Populate the specified index with the specified engine |
||||
125 | */ |
||||
126 | private function populateIndex(string $engine, string $index): int |
||||
127 | { |
||||
128 | $methodName = "{$engine}".ucfirst($index); |
||||
129 | |||||
130 | if (! method_exists($this, $methodName)) { |
||||
131 | $this->error("Method {$methodName} not implemented."); |
||||
132 | |||||
133 | return Command::FAILURE; |
||||
134 | } |
||||
135 | |||||
136 | $this->info("Starting {$engine} {$index} population..."); |
||||
137 | |||||
138 | $startTime = microtime(true); |
||||
139 | $result = $this->{$methodName}(); |
||||
140 | $executionTime = round(microtime(true) - $startTime, 2); |
||||
141 | |||||
142 | if ($result === Command::SUCCESS) { |
||||
143 | $this->info("Population completed in {$executionTime} seconds."); |
||||
144 | } |
||||
145 | |||||
146 | return $result; |
||||
147 | } |
||||
148 | |||||
149 | private function manticoreReleases(): int |
||||
150 | { |
||||
151 | $manticore = new ManticoreSearch; |
||||
152 | $indexName = 'releases_rt'; |
||||
153 | |||||
154 | $manticore->truncateRTIndex(Arr::wrap($indexName)); |
||||
155 | |||||
156 | $total = Release::count(); |
||||
157 | if (! $total) { |
||||
158 | $this->warn('Releases table is empty. Nothing to do.'); |
||||
159 | |||||
160 | return Command::SUCCESS; |
||||
161 | } |
||||
162 | |||||
163 | $query = Release::query() |
||||
164 | ->orderByDesc('releases.id') |
||||
165 | ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id') |
||||
166 | ->select([ |
||||
167 | 'releases.id', |
||||
168 | 'releases.name', |
||||
169 | 'releases.searchname', |
||||
170 | 'releases.fromname', |
||||
171 | 'releases.categories_id', |
||||
172 | ]) |
||||
173 | ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename') |
||||
174 | ->groupBy([ |
||||
175 | 'releases.id', |
||||
176 | 'releases.name', |
||||
177 | 'releases.searchname', |
||||
178 | 'releases.fromname', |
||||
179 | 'releases.categories_id', |
||||
180 | ]); |
||||
181 | |||||
182 | return $this->processManticoreData( |
||||
183 | $indexName, |
||||
184 | $total, |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
185 | $query, |
||||
186 | function ($item) { |
||||
187 | return [ |
||||
188 | 'id' => (string) $item->id, |
||||
189 | 'name' => (string) ($item->name ?: ''), |
||||
190 | 'searchname' => (string) ($item->searchname ?: ''), |
||||
191 | 'fromname' => (string) ($item->fromname ?: ''), |
||||
192 | 'categories_id' => (string) ($item->categories_id ?: '0'), |
||||
193 | 'filename' => (string) ($item->filename ?: ''), |
||||
194 | 'dummy' => '1', |
||||
195 | ]; |
||||
196 | } |
||||
197 | ); |
||||
198 | } |
||||
199 | |||||
200 | /** |
||||
201 | * Populate ManticoreSearch predb index |
||||
202 | */ |
||||
203 | private function manticorePredb(): int |
||||
204 | { |
||||
205 | $manticore = new ManticoreSearch; |
||||
206 | $indexName = 'predb_rt'; |
||||
207 | |||||
208 | $manticore->truncateRTIndex([$indexName]); |
||||
209 | |||||
210 | $total = Predb::count(); |
||||
211 | if (! $total) { |
||||
212 | $this->warn('PreDB table is empty. Nothing to do.'); |
||||
213 | |||||
214 | return Command::SUCCESS; |
||||
215 | } |
||||
216 | |||||
217 | $query = Predb::query() |
||||
218 | ->select(['id', 'title', 'filename', 'source']) |
||||
219 | ->orderBy('id'); |
||||
0 ignored issues
–
show
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
220 | |||||
221 | return $this->processManticoreData( |
||||
222 | $indexName, |
||||
223 | $total, |
||||
0 ignored issues
–
show
It seems like
$total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship ; however, parameter $total of App\Console\Commands\Nnt...:processManticoreData() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
224 | $query, |
||||
225 | function ($item) { |
||||
226 | return [ |
||||
227 | 'id' => $item->id, |
||||
228 | 'title' => (string) ($item->title ?? ''), |
||||
229 | 'filename' => (string) ($item->filename ?? ''), |
||||
230 | 'source' => (string) ($item->source ?? ''), |
||||
231 | 'dummy' => 1, |
||||
232 | ]; |
||||
233 | } |
||||
234 | ); |
||||
235 | } |
||||
236 | |||||
237 | /** |
||||
238 | * Process data for ManticoreSearch |
||||
239 | */ |
||||
240 | private function processManticoreData(string $indexName, int $total, $query, callable $transformer): int |
||||
241 | { |
||||
242 | $manticore = new ManticoreSearch; |
||||
243 | $chunkSize = $this->getChunkSize(); |
||||
244 | |||||
245 | $this->setGroupConcatMaxLen(); |
||||
246 | |||||
247 | $this->info(sprintf( |
||||
248 | "Populating ManticoreSearch index '%s' with %s rows using chunks of %s.", |
||||
249 | $indexName, |
||||
250 | number_format($total), |
||||
251 | number_format($chunkSize) |
||||
252 | )); |
||||
253 | |||||
254 | $bar = $this->output->createProgressBar($total); |
||||
255 | $bar->setFormat('verbose'); |
||||
256 | $bar->start(); |
||||
257 | |||||
258 | $processedCount = 0; |
||||
259 | $errorCount = 0; |
||||
260 | |||||
261 | try { |
||||
262 | $query->chunk($chunkSize, function ($items) use ($manticore, $indexName, $transformer, $bar, &$processedCount, &$errorCount) { |
||||
263 | $data = []; |
||||
264 | |||||
265 | foreach ($items as $item) { |
||||
266 | try { |
||||
267 | $data[] = $transformer($item); |
||||
268 | $processedCount++; |
||||
269 | } catch (Exception $e) { |
||||
270 | $errorCount++; |
||||
271 | if ($this->output->isVerbose()) { |
||||
272 | $this->error("Error processing item {$item->id}: {$e->getMessage()}"); |
||||
273 | } |
||||
274 | } |
||||
275 | $bar->advance(); |
||||
276 | } |
||||
277 | |||||
278 | if (! empty($data)) { |
||||
279 | $manticore->manticoreSearch->table($indexName)->replaceDocuments($data); |
||||
280 | } |
||||
281 | }); |
||||
282 | |||||
283 | $bar->finish(); |
||||
284 | $this->newLine(); |
||||
285 | |||||
286 | if ($errorCount > 0) { |
||||
287 | $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items."); |
||||
288 | } else { |
||||
289 | $this->info('ManticoreSearch population completed successfully!'); |
||||
290 | } |
||||
291 | |||||
292 | return Command::SUCCESS; |
||||
293 | |||||
294 | } catch (Exception $e) { |
||||
295 | $bar->finish(); |
||||
296 | $this->newLine(); |
||||
297 | $this->error("Failed to populate ManticoreSearch: {$e->getMessage()}"); |
||||
298 | |||||
299 | return Command::FAILURE; |
||||
300 | } |
||||
301 | } |
||||
302 | |||||
303 | /** |
||||
304 | * Populate ElasticSearch releases index |
||||
305 | */ |
||||
306 | private function elasticReleases(): int |
||||
307 | { |
||||
308 | $total = Release::count(); |
||||
309 | if (! $total) { |
||||
310 | $this->warn('Releases table is empty. Nothing to do.'); |
||||
311 | |||||
312 | return Command::SUCCESS; |
||||
313 | } |
||||
314 | |||||
315 | $query = Release::query() |
||||
316 | ->orderByDesc('releases.id') |
||||
317 | ->leftJoin('release_files', 'releases.id', '=', 'release_files.releases_id') |
||||
318 | ->select([ |
||||
319 | 'releases.id', |
||||
320 | 'releases.name', |
||||
321 | 'releases.searchname', |
||||
322 | 'releases.fromname', |
||||
323 | 'releases.categories_id', |
||||
324 | 'releases.postdate', |
||||
325 | ]) |
||||
326 | ->selectRaw('IFNULL(GROUP_CONCAT(release_files.name SEPARATOR " "),"") AS filename') |
||||
327 | ->groupBy([ |
||||
328 | 'releases.id', |
||||
329 | 'releases.name', |
||||
330 | 'releases.searchname', |
||||
331 | 'releases.fromname', |
||||
332 | 'releases.categories_id', |
||||
333 | 'releases.postdate', |
||||
334 | ]); |
||||
335 | |||||
336 | return $this->processElasticData( |
||||
337 | 'releases', |
||||
338 | $total, |
||||
0 ignored issues
–
show
It seems like
$total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship ; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
339 | $query, |
||||
340 | function ($item) { |
||||
341 | $searchName = str_replace(['.', '-'], ' ', $item->searchname ?? ''); |
||||
342 | |||||
343 | return [ |
||||
344 | 'id' => $item->id, |
||||
345 | 'name' => $item->name, |
||||
346 | 'searchname' => $item->searchname, |
||||
347 | 'plainsearchname' => $searchName, |
||||
348 | 'fromname' => $item->fromname, |
||||
349 | 'categories_id' => $item->categories_id, |
||||
350 | 'filename' => $item->filename ?? '', |
||||
351 | 'postdate' => $item->postdate, |
||||
352 | ]; |
||||
353 | } |
||||
354 | ); |
||||
355 | } |
||||
356 | |||||
357 | /** |
||||
358 | * Populate ElasticSearch predb index |
||||
359 | */ |
||||
360 | private function elasticPredb(): int |
||||
361 | { |
||||
362 | $total = Predb::count(); |
||||
363 | if (! $total) { |
||||
364 | $this->warn('PreDB table is empty. Nothing to do.'); |
||||
365 | |||||
366 | return Command::SUCCESS; |
||||
367 | } |
||||
368 | |||||
369 | $query = Predb::query() |
||||
370 | ->select(['id', 'title', 'filename', 'source']) |
||||
371 | ->orderBy('id'); |
||||
0 ignored issues
–
show
'id' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
372 | |||||
373 | return $this->processElasticData( |
||||
374 | 'predb', |
||||
375 | $total, |
||||
0 ignored issues
–
show
It seems like
$total can also be of type Illuminate\Database\Eloquent\Builder and Illuminate\Database\Eloq...gHasThroughRelationship ; however, parameter $total of App\Console\Commands\Nnt...s::processElasticData() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
376 | $query, |
||||
377 | function ($item) { |
||||
378 | return [ |
||||
379 | 'id' => $item->id, |
||||
380 | 'title' => $item->title, |
||||
381 | 'filename' => $item->filename, |
||||
382 | 'source' => $item->source, |
||||
383 | ]; |
||||
384 | } |
||||
385 | ); |
||||
386 | } |
||||
387 | |||||
388 | /** |
||||
389 | * Process data for ElasticSearch |
||||
390 | */ |
||||
391 | private function processElasticData(string $indexName, int $total, $query, callable $transformer): int |
||||
392 | { |
||||
393 | $chunkSize = $this->getChunkSize(); |
||||
394 | |||||
395 | $this->setGroupConcatMaxLen(); |
||||
396 | |||||
397 | $this->info(sprintf( |
||||
398 | "Populating ElasticSearch index '%s' with %s rows using chunks of %s.", |
||||
399 | $indexName, |
||||
400 | number_format($total), |
||||
401 | number_format($chunkSize) |
||||
402 | )); |
||||
403 | |||||
404 | $bar = $this->output->createProgressBar($total); |
||||
405 | $bar->setFormat('verbose'); |
||||
406 | $bar->start(); |
||||
407 | |||||
408 | $processedCount = 0; |
||||
409 | $errorCount = 0; |
||||
410 | $batchSize = min($chunkSize, 1000); // ElasticSearch performs better with smaller bulk sizes |
||||
411 | |||||
412 | try { |
||||
413 | $query->chunk($chunkSize, function ($items) use ($indexName, $transformer, $bar, &$processedCount, &$errorCount, $batchSize) { |
||||
414 | // Process in smaller batches for ElasticSearch |
||||
415 | foreach ($items->chunk($batchSize) as $batch) { |
||||
416 | $data = ['body' => []]; |
||||
417 | |||||
418 | foreach ($batch as $item) { |
||||
419 | try { |
||||
420 | $transformedData = $transformer($item); |
||||
421 | |||||
422 | $data['body'][] = [ |
||||
423 | 'index' => [ |
||||
424 | '_index' => $indexName, |
||||
425 | '_id' => $item->id, |
||||
426 | ], |
||||
427 | ]; |
||||
428 | $data['body'][] = $transformedData; |
||||
429 | |||||
430 | $processedCount++; |
||||
431 | } catch (Exception $e) { |
||||
432 | $errorCount++; |
||||
433 | if ($this->output->isVerbose()) { |
||||
434 | $this->error("Error processing item {$item->id}: {$e->getMessage()}"); |
||||
435 | } |
||||
436 | } |
||||
437 | |||||
438 | $bar->advance(); |
||||
439 | } |
||||
440 | |||||
441 | if (! empty($data['body'])) { |
||||
442 | $response = \Elasticsearch::bulk($data); |
||||
443 | |||||
444 | // Check for errors in bulk response |
||||
445 | if (isset($response['errors']) && $response['errors']) { |
||||
446 | foreach ($response['items'] as $item) { |
||||
447 | if (isset($item['index']['error'])) { |
||||
448 | $errorCount++; |
||||
449 | if ($this->output->isVerbose()) { |
||||
450 | $this->error('ElasticSearch error: '.json_encode($item['index']['error'])); |
||||
451 | } |
||||
452 | } |
||||
453 | } |
||||
454 | } |
||||
455 | } |
||||
456 | } |
||||
457 | }); |
||||
458 | |||||
459 | $bar->finish(); |
||||
460 | $this->newLine(); |
||||
461 | |||||
462 | if ($errorCount > 0) { |
||||
463 | $this->warn("Completed with {$errorCount} errors out of {$processedCount} processed items."); |
||||
464 | } else { |
||||
465 | $this->info('ElasticSearch population completed successfully!'); |
||||
466 | } |
||||
467 | |||||
468 | return Command::SUCCESS; |
||||
469 | |||||
470 | } catch (Exception $e) { |
||||
471 | $bar->finish(); |
||||
472 | $this->newLine(); |
||||
473 | $this->error("Failed to populate ElasticSearch: {$e->getMessage()}"); |
||||
474 | |||||
475 | return Command::FAILURE; |
||||
476 | } |
||||
477 | } |
||||
478 | |||||
479 | /** |
||||
480 | * Get the chunk size from options |
||||
481 | */ |
||||
482 | private function getChunkSize(): int |
||||
483 | { |
||||
484 | $chunkSize = (int) $this->option('count'); |
||||
485 | |||||
486 | return $chunkSize > 0 ? $chunkSize : self::DEFAULT_CHUNK_SIZE; |
||||
487 | } |
||||
488 | |||||
489 | /** |
||||
490 | * Set the GROUP_CONCAT max length for the session |
||||
491 | */ |
||||
492 | private function setGroupConcatMaxLen(): void |
||||
493 | { |
||||
494 | DB::statement('SET SESSION group_concat_max_len = ?', [self::GROUP_CONCAT_MAX_LEN]); |
||||
495 | } |
||||
496 | } |
||||
497 |