BackfillService::logGroupProgress()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Services\Backfill;
6
7
use App\Models\UsenetGroup;
8
use App\Services\Binaries\BinariesService;
9
use Blacklight\ColorCLI;
10
use Blacklight\NNTP;
11
use Illuminate\Support\Carbon;
12
use Illuminate\Support\Facades\DB;
13
14
/**
15
 * Service for backfilling Usenet groups with historical articles.
16
 *
17
 * This service handles downloading older articles from Usenet groups
18
 * to fill in historical data. It supports:
19
 * - Backfilling by article count or target date
20
 * - Safe backfill with date-based targeting
21
 * - Automatic group disable when backfill limit is reached
22
 */
23
final class BackfillService
24
{
25
    private const DEFAULT_ARTICLE_COUNT = 20000;
26
27
    private BackfillConfig $config;
0 ignored issues
show
Bug introduced by
The type App\Services\Backfill\BackfillConfig was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
28
29
    private BinariesService $binaries;
30
31
    private NNTP $nntp;
32
33
    private ColorCLI $colorCli;
34
35
    public function __construct(
36
        ?BackfillConfig $config = null,
37
        ?BinariesService $binaries = null,
38
        ?NNTP $nntp = null,
39
        ?ColorCLI $colorCli = null,
40
    ) {
41
        $this->config = $config ?? BackfillConfig::fromSettings();
42
        $this->binaries = $binaries ?? new BinariesService;
43
        $this->nntp = $nntp ?? new NNTP;
44
        $this->colorCli = $colorCli ?? new ColorCLI;
45
    }
46
47
    /**
48
     * Backfill all groups or a specific group.
49
     *
50
     * @param  string  $groupName  Optional specific group to backfill
51
     * @param  int|string  $articles  Number of articles to backfill, or empty for date-based
52
     * @param  string  $type  Backfill type filter
53
     *
54
     * @throws \Throwable
55
     */
56
    public function backfillAllGroups(string $groupName = '', int|string $articles = '', string $type = ''): void
57
    {
58
        $groups = $this->getGroupsToBackfill($groupName, $type);
59
60
        if ($groups === []) {
61
            $this->log('No groups specified. Ensure groups are added to database for updating.', 'warning');
62
63
            return;
64
        }
65
66
        $groupCount = \count($groups);
67
        $this->logBackfillStart($groupCount);
68
69
        $articles = $this->normalizeArticleCount($articles);
70
        $startTime = now();
71
72
        foreach ($groups as $index => $group) {
73
            $this->logGroupProgress($groupName, $index + 1, $groupCount);
74
            $this->backfillGroup($group->toArray(), $groupCount - $index - 1, $articles);
75
        }
76
77
        $this->logBackfillComplete($startTime);
78
    }
79
80
    /**
81
     * Backfill a single group.
82
     *
83
     * @param  array  $groupArr  Group data array
84
     * @param  int  $remainingGroups  Number of groups remaining after this one
85
     * @param  int|string  $articles  Number of articles to backfill, or empty for date-based
86
     *
87
     * @throws \Throwable
88
     */
89
    public function backfillGroup(array $groupArr, int $remainingGroups, int|string $articles = ''): void
90
    {
91
        $startTime = now();
92
        $this->binaries->logIndexerStart();
93
94
        $shortGroupName = $this->getShortGroupName($groupArr['name']);
95
96
        if (! $this->validateGroupState($groupArr, $shortGroupName)) {
97
            return;
98
        }
99
100
        $serverData = $this->selectNntpGroup($groupArr['name']);
101
        if ($serverData === null) {
102
            return;
103
        }
104
105
        $this->log("Processing {$shortGroupName}", 'primary');
106
107
        $targetPost = $this->calculateTargetPost($groupArr, $articles, $serverData);
108
109
        if (! $this->validateTargetPost($groupArr, $targetPost, $serverData, $shortGroupName)) {
110
            return;
111
        }
112
113
        $this->logGroupInfo($groupArr, $serverData, $targetPost, $shortGroupName);
114
115
        $this->processBackfillChunks($groupArr, $targetPost, $remainingGroups, $shortGroupName);
116
117
        $this->logGroupComplete($shortGroupName, $startTime);
118
    }
119
120
    /**
121
     * Safe backfill - backfill groups that haven't reached the safe backfill date.
122
     *
123
     * @param  int|string  $articles  Number of articles to backfill
124
     *
125
     * @throws \Throwable
126
     */
127
    public function safeBackfill(int|string $articles = ''): void
128
    {
129
        $group = UsenetGroup::query()
130
            ->whereBetween('first_record_postdate', [Carbon::createFromDate($this->config->safeBackFillDate), now()])
131
            ->where('backfill', '=', 1)
132
            ->select(['name'])
133
            ->orderBy('name')
0 ignored issues
show
Bug introduced by
'name' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

133
            ->orderBy(/** @scrutinizer ignore-type */ 'name')
Loading history...
134
            ->first();
135
136
        if ($group === null) {
137
            $message = sprintf(
138
                'No groups to backfill, they are all at the target date %s, or you have not enabled them to be backfilled in the groups page.',
139
                $this->config->safeBackFillDate
140
            );
141
            exit($message.PHP_EOL);
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
142
        }
143
144
        $this->backfillAllGroups($group->name, $articles);
145
    }
146
147
    /**
148
     * Get groups to backfill based on criteria.
149
     */
150
    private function getGroupsToBackfill(string $groupName, string $type): array
151
    {
152
        if ($groupName !== '') {
153
            $group = UsenetGroup::getByName($groupName);
154
155
            return $group ? [$group] : [];
156
        }
157
158
        return UsenetGroup::getActiveBackfill($type)->all();
159
    }
160
161
    /**
162
     * Normalize article count parameter.
163
     */
164
    private function normalizeArticleCount(int|string $articles): int|string
165
    {
166
        if ($articles !== '' && ! is_numeric($articles)) {
167
            return self::DEFAULT_ARTICLE_COUNT;
168
        }
169
170
        return $articles;
171
    }
172
173
    /**
174
     * Get shortened group name for display.
175
     */
176
    private function getShortGroupName(string $groupName): string
177
    {
178
        return str_replace('alt.binaries', 'a.b', $groupName);
179
    }
180
181
    /**
182
     * Validate that group is in a valid state for backfilling.
183
     */
184
    private function validateGroupState(array $groupArr, string $shortGroupName): bool
185
    {
186
        if ($groupArr['first_record'] <= 0) {
187
            $this->log(
188
                "You need to run update_binaries on {$shortGroupName}. Otherwise the group is dead, you must disable it.",
189
                'error'
190
            );
191
192
            return false;
193
        }
194
195
        return true;
196
    }
197
198
    /**
199
     * Select NNTP group and return server data.
200
     */
201
    private function selectNntpGroup(string $groupName): ?array
202
    {
203
        $data = $this->nntp->selectGroup($groupName);
204
205
        if ($this->nntp->isError($data)) {
206
            $data = $this->nntp->dataError($this->nntp, $groupName);
207
            if ($this->nntp->isError($data)) {
208
                return null;
209
            }
210
        }
211
212
        return $data;
213
    }
214
215
    /**
216
     * Calculate target post number based on articles count or date.
217
     */
218
    private function calculateTargetPost(array $groupArr, int|string $articles, array $serverData): int
219
    {
220
        $isArticleBased = $articles !== '';
221
222
        $targetPost = $isArticleBased
223
            ? (int) round($groupArr['first_record'] - (int) $articles)
224
            : (int) $this->binaries->daytopost($groupArr['backfill_target'], $serverData);
225
226
        // Ensure target is not below server's oldest article
227
        return max($targetPost, (int) $serverData['first']);
228
    }
229
230
    /**
231
     * Validate that target post is achievable.
232
     */
233
    private function validateTargetPost(array $groupArr, int $targetPost, array $serverData, string $shortGroupName): bool
234
    {
235
        if ($targetPost >= $groupArr['first_record'] || $groupArr['first_record'] <= $serverData['first']) {
236
            $message = "We have hit the maximum we can backfill for {$shortGroupName}";
237
            $message .= $this->config->disableBackfillGroup
238
                ? ', disabling backfill on it.'
239
                : ', skipping it, consider disabling backfill on it.';
240
241
            if ($this->config->disableBackfillGroup) {
242
                UsenetGroup::updateGroupStatus($groupArr['id'], 'backfill', 0);
243
            }
244
245
            $this->log($message, 'notice');
246
247
            return false;
248
        }
249
250
        return true;
251
    }
252
253
    /**
254
     * Process backfill in chunks.
255
     */
256
    private function processBackfillChunks(array $groupArr, int $targetPost, int $remainingGroups, string $shortGroupName): void
257
    {
258
        $messageBuffer = $this->binaries->getMessageBuffer();
259
        $last = $groupArr['first_record'] - 1;
260
        $first = max($last - $messageBuffer + 1, $targetPost);
261
262
        while (true) {
263
            $this->logChunkProgress($first, $last, $shortGroupName, $remainingGroups, $targetPost);
264
265
            flush();
266
            $scanResult = $this->binaries->scan($groupArr, $first, $last, $this->config->safePartRepair);
267
268
            $this->updateGroupRecord($groupArr, $first, $scanResult);
269
270
            if ($first === $targetPost) {
271
                break;
272
            }
273
274
            // Move to next chunk
275
            $last = $first - 1;
276
            $first = max($last - $messageBuffer + 1, $targetPost);
277
        }
278
    }
279
280
    /**
281
     * Update group record with new first_record and postdate.
282
     */
283
    private function updateGroupRecord(array $groupArr, int $first, ?array $scanResult): void
284
    {
285
        $newDate = isset($scanResult['firstArticleDate'])
286
            ? strtotime($scanResult['firstArticleDate'])
287
            : $this->binaries->postdate($first, $this->nntp->selectGroup($groupArr['name']));
288
289
        DB::update(
290
            'UPDATE usenet_groups SET first_record_postdate = FROM_UNIXTIME(?), first_record = ?, last_updated = NOW() WHERE id = ?',
291
            [$newDate, $first, $groupArr['id']]
292
        );
293
    }
294
295
    /**
296
     * Log message with appropriate styling.
297
     */
298
    private function log(string $message, string $type = 'primary'): void
299
    {
300
        if (! $this->config->echoCli) {
301
            return;
302
        }
303
304
        match ($type) {
305
            'header' => $this->colorCli->header($message),
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->colorCli->header($message) targeting Blacklight\ColorCLI::header() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
306
            'warning' => $this->colorCli->warning($message),
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->colorCli->warning($message) targeting Blacklight\ColorCLI::warning() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
307
            'error' => $this->colorCli->error($message),
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->colorCli->error($message) targeting Blacklight\ColorCLI::error() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
308
            'notice' => $this->colorCli->notice($message),
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->colorCli->notice($message) targeting Blacklight\ColorCLI::notice() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
309
            default => $this->colorCli->primary($message),
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->colorCli->primary($message) targeting Blacklight\ColorCLI::primary() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
310
        };
311
    }
312
313
    /**
314
     * Log backfill start information.
315
     */
316
    private function logBackfillStart(int $groupCount): void
317
    {
318
        $compressionStatus = $this->config->compressedHeaders ? 'Yes' : 'No';
319
        $this->log("Backfilling: {$groupCount} group(s) - Using compression? {$compressionStatus}", 'header');
320
    }
321
322
    /**
323
     * Log group progress.
324
     */
325
    private function logGroupProgress(string $groupName, int $current, int $total): void
326
    {
327
        if ($groupName === '') {
328
            $this->log("Starting group {$current} of {$total}", 'header');
329
        }
330
    }
331
332
    /**
333
     * Log backfill completion.
334
     */
335
    private function logBackfillComplete(\Illuminate\Support\Carbon $startTime): void
336
    {
337
        $duration = now()->diffInSeconds($startTime, true);
338
        $this->log("Backfilling completed in {$duration} seconds.");
339
    }
340
341
    /**
342
     * Log group info before processing.
343
     */
344
    private function logGroupInfo(array $groupArr, array $serverData, int $targetPost, string $shortGroupName): void
345
    {
346
        $this->log(sprintf(
347
            "Group %s's oldest article is %s, newest is %s. Our target article is %s. Our oldest article is article %s.",
348
            $shortGroupName,
349
            number_format((float) $serverData['first']),
350
            number_format((float) $serverData['last']),
351
            number_format($targetPost),
352
            number_format((float) $groupArr['first_record'])
353
        ));
354
    }
355
356
    /**
357
     * Log chunk progress.
358
     */
359
    private function logChunkProgress(int $first, int $last, string $shortGroupName, int $remainingGroups, int $targetPost): void
360
    {
361
        $this->log(sprintf(
362
            'Getting %s articles from %s, %d group(s) left. (%s articles in queue)',
363
            number_format($last - $first + 1),
364
            $shortGroupName,
365
            $remainingGroups,
366
            number_format($first - $targetPost)
367
        ), 'header');
368
    }
369
370
    /**
371
     * Log group completion.
372
     */
373
    private function logGroupComplete(string $shortGroupName, \Illuminate\Support\Carbon $startTime): void
374
    {
375
        $duration = number_format(now()->timestamp - $startTime->timestamp, 2);
376
        $this->log(PHP_EOL."Group {$shortGroupName} processed in {$duration} seconds.");
377
    }
378
}
379
380