NNTmux /
newznab-tmux
| 1 | <?php |
||||
| 2 | |||||
| 3 | declare(strict_types=1); |
||||
| 4 | |||||
| 5 | namespace App\Services\Backfill; |
||||
| 6 | |||||
| 7 | use App\Models\UsenetGroup; |
||||
| 8 | use App\Services\Binaries\BinariesService; |
||||
| 9 | use Blacklight\ColorCLI; |
||||
| 10 | use Blacklight\NNTP; |
||||
| 11 | use Illuminate\Support\Carbon; |
||||
| 12 | use Illuminate\Support\Facades\DB; |
||||
| 13 | |||||
| 14 | /** |
||||
| 15 | * Service for backfilling Usenet groups with historical articles. |
||||
| 16 | * |
||||
| 17 | * This service handles downloading older articles from Usenet groups |
||||
| 18 | * to fill in historical data. It supports: |
||||
| 19 | * - Backfilling by article count or target date |
||||
| 20 | * - Safe backfill with date-based targeting |
||||
| 21 | * - Automatic group disable when backfill limit is reached |
||||
| 22 | */ |
||||
| 23 | final class BackfillService |
||||
| 24 | { |
||||
| 25 | private const DEFAULT_ARTICLE_COUNT = 20000; |
||||
| 26 | |||||
| 27 | private BackfillConfig $config; |
||||
|
0 ignored issues
–
show
|
|||||
| 28 | |||||
| 29 | private BinariesService $binaries; |
||||
| 30 | |||||
| 31 | private NNTP $nntp; |
||||
| 32 | |||||
| 33 | private ColorCLI $colorCli; |
||||
| 34 | |||||
| 35 | public function __construct( |
||||
| 36 | ?BackfillConfig $config = null, |
||||
| 37 | ?BinariesService $binaries = null, |
||||
| 38 | ?NNTP $nntp = null, |
||||
| 39 | ?ColorCLI $colorCli = null, |
||||
| 40 | ) { |
||||
| 41 | $this->config = $config ?? BackfillConfig::fromSettings(); |
||||
| 42 | $this->binaries = $binaries ?? new BinariesService; |
||||
| 43 | $this->nntp = $nntp ?? new NNTP; |
||||
| 44 | $this->colorCli = $colorCli ?? new ColorCLI; |
||||
| 45 | } |
||||
| 46 | |||||
| 47 | /** |
||||
| 48 | * Backfill all groups or a specific group. |
||||
| 49 | * |
||||
| 50 | * @param string $groupName Optional specific group to backfill |
||||
| 51 | * @param int|string $articles Number of articles to backfill, or empty for date-based |
||||
| 52 | * @param string $type Backfill type filter |
||||
| 53 | * |
||||
| 54 | * @throws \Throwable |
||||
| 55 | */ |
||||
| 56 | public function backfillAllGroups(string $groupName = '', int|string $articles = '', string $type = ''): void |
||||
| 57 | { |
||||
| 58 | $groups = $this->getGroupsToBackfill($groupName, $type); |
||||
| 59 | |||||
| 60 | if ($groups === []) { |
||||
| 61 | $this->log('No groups specified. Ensure groups are added to database for updating.', 'warning'); |
||||
| 62 | |||||
| 63 | return; |
||||
| 64 | } |
||||
| 65 | |||||
| 66 | $groupCount = \count($groups); |
||||
| 67 | $this->logBackfillStart($groupCount); |
||||
| 68 | |||||
| 69 | $articles = $this->normalizeArticleCount($articles); |
||||
| 70 | $startTime = now(); |
||||
| 71 | |||||
| 72 | foreach ($groups as $index => $group) { |
||||
| 73 | $this->logGroupProgress($groupName, $index + 1, $groupCount); |
||||
| 74 | $this->backfillGroup($group->toArray(), $groupCount - $index - 1, $articles); |
||||
| 75 | } |
||||
| 76 | |||||
| 77 | $this->logBackfillComplete($startTime); |
||||
| 78 | } |
||||
| 79 | |||||
| 80 | /** |
||||
| 81 | * Backfill a single group. |
||||
| 82 | * |
||||
| 83 | * @param array $groupArr Group data array |
||||
| 84 | * @param int $remainingGroups Number of groups remaining after this one |
||||
| 85 | * @param int|string $articles Number of articles to backfill, or empty for date-based |
||||
| 86 | * |
||||
| 87 | * @throws \Throwable |
||||
| 88 | */ |
||||
| 89 | public function backfillGroup(array $groupArr, int $remainingGroups, int|string $articles = ''): void |
||||
| 90 | { |
||||
| 91 | $startTime = now(); |
||||
| 92 | $this->binaries->logIndexerStart(); |
||||
| 93 | |||||
| 94 | $shortGroupName = $this->getShortGroupName($groupArr['name']); |
||||
| 95 | |||||
| 96 | if (! $this->validateGroupState($groupArr, $shortGroupName)) { |
||||
| 97 | return; |
||||
| 98 | } |
||||
| 99 | |||||
| 100 | $serverData = $this->selectNntpGroup($groupArr['name']); |
||||
| 101 | if ($serverData === null) { |
||||
| 102 | return; |
||||
| 103 | } |
||||
| 104 | |||||
| 105 | $this->log("Processing {$shortGroupName}", 'primary'); |
||||
| 106 | |||||
| 107 | $targetPost = $this->calculateTargetPost($groupArr, $articles, $serverData); |
||||
| 108 | |||||
| 109 | if (! $this->validateTargetPost($groupArr, $targetPost, $serverData, $shortGroupName)) { |
||||
| 110 | return; |
||||
| 111 | } |
||||
| 112 | |||||
| 113 | $this->logGroupInfo($groupArr, $serverData, $targetPost, $shortGroupName); |
||||
| 114 | |||||
| 115 | $this->processBackfillChunks($groupArr, $targetPost, $remainingGroups, $shortGroupName); |
||||
| 116 | |||||
| 117 | $this->logGroupComplete($shortGroupName, $startTime); |
||||
| 118 | } |
||||
| 119 | |||||
| 120 | /** |
||||
| 121 | * Safe backfill - backfill groups that haven't reached the safe backfill date. |
||||
| 122 | * |
||||
| 123 | * @param int|string $articles Number of articles to backfill |
||||
| 124 | * |
||||
| 125 | * @throws \Throwable |
||||
| 126 | */ |
||||
| 127 | public function safeBackfill(int|string $articles = ''): void |
||||
| 128 | { |
||||
| 129 | $group = UsenetGroup::query() |
||||
| 130 | ->whereBetween('first_record_postdate', [Carbon::createFromDate($this->config->safeBackFillDate), now()]) |
||||
| 131 | ->where('backfill', '=', 1) |
||||
| 132 | ->select(['name']) |
||||
| 133 | ->orderBy('name') |
||||
|
0 ignored issues
–
show
'name' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy().
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 134 | ->first(); |
||||
| 135 | |||||
| 136 | if ($group === null) { |
||||
| 137 | $message = sprintf( |
||||
| 138 | 'No groups to backfill, they are all at the target date %s, or you have not enabled them to be backfilled in the groups page.', |
||||
| 139 | $this->config->safeBackFillDate |
||||
| 140 | ); |
||||
| 141 | exit($message.PHP_EOL); |
||||
|
0 ignored issues
–
show
|
|||||
| 142 | } |
||||
| 143 | |||||
| 144 | $this->backfillAllGroups($group->name, $articles); |
||||
| 145 | } |
||||
| 146 | |||||
| 147 | /** |
||||
| 148 | * Get groups to backfill based on criteria. |
||||
| 149 | */ |
||||
| 150 | private function getGroupsToBackfill(string $groupName, string $type): array |
||||
| 151 | { |
||||
| 152 | if ($groupName !== '') { |
||||
| 153 | $group = UsenetGroup::getByName($groupName); |
||||
| 154 | |||||
| 155 | return $group ? [$group] : []; |
||||
| 156 | } |
||||
| 157 | |||||
| 158 | return UsenetGroup::getActiveBackfill($type)->all(); |
||||
| 159 | } |
||||
| 160 | |||||
| 161 | /** |
||||
| 162 | * Normalize article count parameter. |
||||
| 163 | */ |
||||
| 164 | private function normalizeArticleCount(int|string $articles): int|string |
||||
| 165 | { |
||||
| 166 | if ($articles !== '' && ! is_numeric($articles)) { |
||||
| 167 | return self::DEFAULT_ARTICLE_COUNT; |
||||
| 168 | } |
||||
| 169 | |||||
| 170 | return $articles; |
||||
| 171 | } |
||||
| 172 | |||||
| 173 | /** |
||||
| 174 | * Get shortened group name for display. |
||||
| 175 | */ |
||||
| 176 | private function getShortGroupName(string $groupName): string |
||||
| 177 | { |
||||
| 178 | return str_replace('alt.binaries', 'a.b', $groupName); |
||||
| 179 | } |
||||
| 180 | |||||
| 181 | /** |
||||
| 182 | * Validate that group is in a valid state for backfilling. |
||||
| 183 | */ |
||||
| 184 | private function validateGroupState(array $groupArr, string $shortGroupName): bool |
||||
| 185 | { |
||||
| 186 | if ($groupArr['first_record'] <= 0) { |
||||
| 187 | $this->log( |
||||
| 188 | "You need to run update_binaries on {$shortGroupName}. Otherwise the group is dead, you must disable it.", |
||||
| 189 | 'error' |
||||
| 190 | ); |
||||
| 191 | |||||
| 192 | return false; |
||||
| 193 | } |
||||
| 194 | |||||
| 195 | return true; |
||||
| 196 | } |
||||
| 197 | |||||
| 198 | /** |
||||
| 199 | * Select NNTP group and return server data. |
||||
| 200 | */ |
||||
| 201 | private function selectNntpGroup(string $groupName): ?array |
||||
| 202 | { |
||||
| 203 | $data = $this->nntp->selectGroup($groupName); |
||||
| 204 | |||||
| 205 | if ($this->nntp->isError($data)) { |
||||
| 206 | $data = $this->nntp->dataError($this->nntp, $groupName); |
||||
| 207 | if ($this->nntp->isError($data)) { |
||||
| 208 | return null; |
||||
| 209 | } |
||||
| 210 | } |
||||
| 211 | |||||
| 212 | return $data; |
||||
| 213 | } |
||||
| 214 | |||||
| 215 | /** |
||||
| 216 | * Calculate target post number based on articles count or date. |
||||
| 217 | */ |
||||
| 218 | private function calculateTargetPost(array $groupArr, int|string $articles, array $serverData): int |
||||
| 219 | { |
||||
| 220 | $isArticleBased = $articles !== ''; |
||||
| 221 | |||||
| 222 | $targetPost = $isArticleBased |
||||
| 223 | ? (int) round($groupArr['first_record'] - (int) $articles) |
||||
| 224 | : (int) $this->binaries->daytopost($groupArr['backfill_target'], $serverData); |
||||
| 225 | |||||
| 226 | // Ensure target is not below server's oldest article |
||||
| 227 | return max($targetPost, (int) $serverData['first']); |
||||
| 228 | } |
||||
| 229 | |||||
| 230 | /** |
||||
| 231 | * Validate that target post is achievable. |
||||
| 232 | */ |
||||
| 233 | private function validateTargetPost(array $groupArr, int $targetPost, array $serverData, string $shortGroupName): bool |
||||
| 234 | { |
||||
| 235 | if ($targetPost >= $groupArr['first_record'] || $groupArr['first_record'] <= $serverData['first']) { |
||||
| 236 | $message = "We have hit the maximum we can backfill for {$shortGroupName}"; |
||||
| 237 | $message .= $this->config->disableBackfillGroup |
||||
| 238 | ? ', disabling backfill on it.' |
||||
| 239 | : ', skipping it, consider disabling backfill on it.'; |
||||
| 240 | |||||
| 241 | if ($this->config->disableBackfillGroup) { |
||||
| 242 | UsenetGroup::updateGroupStatus($groupArr['id'], 'backfill', 0); |
||||
| 243 | } |
||||
| 244 | |||||
| 245 | $this->log($message, 'notice'); |
||||
| 246 | |||||
| 247 | return false; |
||||
| 248 | } |
||||
| 249 | |||||
| 250 | return true; |
||||
| 251 | } |
||||
| 252 | |||||
| 253 | /** |
||||
| 254 | * Process backfill in chunks. |
||||
| 255 | */ |
||||
| 256 | private function processBackfillChunks(array $groupArr, int $targetPost, int $remainingGroups, string $shortGroupName): void |
||||
| 257 | { |
||||
| 258 | $messageBuffer = $this->binaries->getMessageBuffer(); |
||||
| 259 | $last = $groupArr['first_record'] - 1; |
||||
| 260 | $first = max($last - $messageBuffer + 1, $targetPost); |
||||
| 261 | |||||
| 262 | while (true) { |
||||
| 263 | $this->logChunkProgress($first, $last, $shortGroupName, $remainingGroups, $targetPost); |
||||
| 264 | |||||
| 265 | flush(); |
||||
| 266 | $scanResult = $this->binaries->scan($groupArr, $first, $last, $this->config->safePartRepair); |
||||
| 267 | |||||
| 268 | $this->updateGroupRecord($groupArr, $first, $scanResult); |
||||
| 269 | |||||
| 270 | if ($first === $targetPost) { |
||||
| 271 | break; |
||||
| 272 | } |
||||
| 273 | |||||
| 274 | // Move to next chunk |
||||
| 275 | $last = $first - 1; |
||||
| 276 | $first = max($last - $messageBuffer + 1, $targetPost); |
||||
| 277 | } |
||||
| 278 | } |
||||
| 279 | |||||
| 280 | /** |
||||
| 281 | * Update group record with new first_record and postdate. |
||||
| 282 | */ |
||||
| 283 | private function updateGroupRecord(array $groupArr, int $first, ?array $scanResult): void |
||||
| 284 | { |
||||
| 285 | $newDate = isset($scanResult['firstArticleDate']) |
||||
| 286 | ? strtotime($scanResult['firstArticleDate']) |
||||
| 287 | : $this->binaries->postdate($first, $this->nntp->selectGroup($groupArr['name'])); |
||||
| 288 | |||||
| 289 | DB::update( |
||||
| 290 | 'UPDATE usenet_groups SET first_record_postdate = FROM_UNIXTIME(?), first_record = ?, last_updated = NOW() WHERE id = ?', |
||||
| 291 | [$newDate, $first, $groupArr['id']] |
||||
| 292 | ); |
||||
| 293 | } |
||||
| 294 | |||||
| 295 | /** |
||||
| 296 | * Log message with appropriate styling. |
||||
| 297 | */ |
||||
| 298 | private function log(string $message, string $type = 'primary'): void |
||||
| 299 | { |
||||
| 300 | if (! $this->config->echoCli) { |
||||
| 301 | return; |
||||
| 302 | } |
||||
| 303 | |||||
| 304 | match ($type) { |
||||
| 305 | 'header' => $this->colorCli->header($message), |
||||
|
0 ignored issues
–
show
Are you sure the usage of
$this->colorCli->header($message) targeting Blacklight\ColorCLI::header() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 306 | 'warning' => $this->colorCli->warning($message), |
||||
|
0 ignored issues
–
show
Are you sure the usage of
$this->colorCli->warning($message) targeting Blacklight\ColorCLI::warning() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 307 | 'error' => $this->colorCli->error($message), |
||||
|
0 ignored issues
–
show
Are you sure the usage of
$this->colorCli->error($message) targeting Blacklight\ColorCLI::error() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 308 | 'notice' => $this->colorCli->notice($message), |
||||
|
0 ignored issues
–
show
Are you sure the usage of
$this->colorCli->notice($message) targeting Blacklight\ColorCLI::notice() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 309 | default => $this->colorCli->primary($message), |
||||
|
0 ignored issues
–
show
Are you sure the usage of
$this->colorCli->primary($message) targeting Blacklight\ColorCLI::primary() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 310 | }; |
||||
| 311 | } |
||||
| 312 | |||||
| 313 | /** |
||||
| 314 | * Log backfill start information. |
||||
| 315 | */ |
||||
| 316 | private function logBackfillStart(int $groupCount): void |
||||
| 317 | { |
||||
| 318 | $compressionStatus = $this->config->compressedHeaders ? 'Yes' : 'No'; |
||||
| 319 | $this->log("Backfilling: {$groupCount} group(s) - Using compression? {$compressionStatus}", 'header'); |
||||
| 320 | } |
||||
| 321 | |||||
| 322 | /** |
||||
| 323 | * Log group progress. |
||||
| 324 | */ |
||||
| 325 | private function logGroupProgress(string $groupName, int $current, int $total): void |
||||
| 326 | { |
||||
| 327 | if ($groupName === '') { |
||||
| 328 | $this->log("Starting group {$current} of {$total}", 'header'); |
||||
| 329 | } |
||||
| 330 | } |
||||
| 331 | |||||
| 332 | /** |
||||
| 333 | * Log backfill completion. |
||||
| 334 | */ |
||||
| 335 | private function logBackfillComplete(\Illuminate\Support\Carbon $startTime): void |
||||
| 336 | { |
||||
| 337 | $duration = now()->diffInSeconds($startTime, true); |
||||
| 338 | $this->log("Backfilling completed in {$duration} seconds."); |
||||
| 339 | } |
||||
| 340 | |||||
| 341 | /** |
||||
| 342 | * Log group info before processing. |
||||
| 343 | */ |
||||
| 344 | private function logGroupInfo(array $groupArr, array $serverData, int $targetPost, string $shortGroupName): void |
||||
| 345 | { |
||||
| 346 | $this->log(sprintf( |
||||
| 347 | "Group %s's oldest article is %s, newest is %s. Our target article is %s. Our oldest article is article %s.", |
||||
| 348 | $shortGroupName, |
||||
| 349 | number_format((float) $serverData['first']), |
||||
| 350 | number_format((float) $serverData['last']), |
||||
| 351 | number_format($targetPost), |
||||
| 352 | number_format((float) $groupArr['first_record']) |
||||
| 353 | )); |
||||
| 354 | } |
||||
| 355 | |||||
| 356 | /** |
||||
| 357 | * Log chunk progress. |
||||
| 358 | */ |
||||
| 359 | private function logChunkProgress(int $first, int $last, string $shortGroupName, int $remainingGroups, int $targetPost): void |
||||
| 360 | { |
||||
| 361 | $this->log(sprintf( |
||||
| 362 | 'Getting %s articles from %s, %d group(s) left. (%s articles in queue)', |
||||
| 363 | number_format($last - $first + 1), |
||||
| 364 | $shortGroupName, |
||||
| 365 | $remainingGroups, |
||||
| 366 | number_format($first - $targetPost) |
||||
| 367 | ), 'header'); |
||||
| 368 | } |
||||
| 369 | |||||
| 370 | /** |
||||
| 371 | * Log group completion. |
||||
| 372 | */ |
||||
| 373 | private function logGroupComplete(string $shortGroupName, \Illuminate\Support\Carbon $startTime): void |
||||
| 374 | { |
||||
| 375 | $duration = number_format(now()->timestamp - $startTime->timestamp, 2); |
||||
| 376 | $this->log(PHP_EOL."Group {$shortGroupName} processed in {$duration} seconds."); |
||||
| 377 | } |
||||
| 378 | } |
||||
| 379 | |||||
| 380 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths