Passed
Push — master ( ae3f18...b8bba4 )
by Darko
10:59
created

HeaderParser   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 152
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 22
eloc 55
dl 0
loc 152
rs 10
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
B parse() 0 60 10
A reset() 0 4 1
A getBlacklistedCount() 0 3 1
A flushBlacklistUpdates() 0 5 2
A getNotYEncCount() 0 3 1
A __construct() 0 3 1
A getArticleRange() 0 28 6
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Services\Binaries;
6
7
use App\Services\BlacklistService;
8
use Illuminate\Support\Facades\Log;
9
10
/**
11
 * Parses and filters raw NNTP headers.
12
 */
13
final class HeaderParser
14
{
15
    private BlacklistService $blacklistService;
16
17
    private int $notYEnc = 0;
18
19
    private int $blacklisted = 0;
20
21
    public function __construct(?BlacklistService $blacklistService = null)
22
    {
23
        $this->blacklistService = $blacklistService ?? new BlacklistService;
24
    }
25
26
    /**
27
     * Reset counters for a new batch.
28
     */
29
    public function reset(): void
30
    {
31
        $this->notYEnc = 0;
32
        $this->blacklisted = 0;
33
    }
34
35
    /**
36
     * Parse and filter raw headers from NNTP.
37
     *
38
     * @param  array  $headers  Raw headers from NNTP
39
     * @param  string  $groupName  The newsgroup name
40
     * @param  bool  $partRepair  Whether this is a part repair scan
41
     * @param  array|null  $missingParts  Missing part numbers if part repair
42
     * @return array Filtered and parsed headers with article info
43
     */
44
    public function parse(
45
        array $headers,
46
        string $groupName,
47
        bool $partRepair = false,
48
        ?array $missingParts = null
49
    ): array {
50
        $parsed = [];
51
        $headersRepaired = [];
52
53
        foreach ($headers as $header) {
54
            // Check if we got the article
55
            if (! isset($header['Number'])) {
56
                continue;
57
            }
58
59
            // For part repair, only process missing parts
60
            if ($partRepair && $missingParts !== null) {
61
                if (! \in_array($header['Number'], $missingParts, false)) {
62
                    continue;
63
                }
64
                $headersRepaired[] = $header['Number'];
65
            }
66
67
            // Parse subject to get base name and part/total like "(12/45)"
68
            if (! preg_match('/^\s*(?!"Usenet Index Post)(.+)\s+\((\d+)\/(\d+)\)/', $header['Subject'], $matches)) {
69
                $this->notYEnc++;
70
71
                continue;
72
            }
73
74
            // Normalize to include yEnc if missing
75
            if (stripos($header['Subject'], 'yEnc') === false) {
76
                $matches[1] .= ' yEnc';
77
            }
78
79
            $header['matches'] = $matches;
80
81
            // Filter subject based on black/white list
82
            if ($this->blacklistService->isBlackListed($header, $groupName)) {
83
                $this->blacklisted++;
84
85
                continue;
86
            }
87
88
            // Ensure Bytes is set
89
            if (empty($header['Bytes'])) {
90
                $header['Bytes'] = $header[':bytes'] ?? 0;
91
            }
92
93
            $parsed[] = [
94
                'header' => $header,
95
                'repaired' => $partRepair,
96
            ];
97
        }
98
99
        return [
100
            'headers' => array_column($parsed, 'header'),
101
            'repaired' => $headersRepaired,
102
            'notYEnc' => $this->notYEnc,
103
            'blacklisted' => $this->blacklisted,
104
        ];
105
    }
106
107
    /**
108
     * Update blacklist last_activity for matched rules.
109
     */
110
    public function flushBlacklistUpdates(): void
111
    {
112
        $ids = $this->blacklistService->getAndClearIdsToUpdate();
113
        if (! empty($ids)) {
114
            $this->blacklistService->updateBlacklistUsage($ids);
115
        }
116
    }
117
118
    /**
119
     * Get count of non-yEnc headers filtered.
120
     */
121
    public function getNotYEncCount(): int
122
    {
123
        return $this->notYEnc;
124
    }
125
126
    /**
127
     * Get count of blacklisted headers.
128
     */
129
    public function getBlacklistedCount(): int
130
    {
131
        return $this->blacklisted;
132
    }
133
134
    /**
135
     * Extract highest and lowest article info from headers.
136
     */
137
    public function getArticleRange(array $headers): array
138
    {
139
        $result = [];
140
        $count = \count($headers);
141
142
        if ($count === 0) {
143
            return $result;
144
        }
145
146
        // Find first valid article
147
        for ($i = 0; $i < $count; $i++) {
148
            if (isset($headers[$i]['Number'])) {
149
                $result['firstArticleNumber'] = $headers[$i]['Number'];
150
                $result['firstArticleDate'] = $headers[$i]['Date'] ?? null;
151
                break;
152
            }
153
        }
154
155
        // Find last valid article
156
        for ($i = $count - 1; $i >= 0; $i--) {
157
            if (isset($headers[$i]['Number'])) {
158
                $result['lastArticleNumber'] = $headers[$i]['Number'];
159
                $result['lastArticleDate'] = $headers[$i]['Date'] ?? null;
160
                break;
161
            }
162
        }
163
164
        return $result;
165
    }
166
}
167
168