Passed
Push — master ( 4cabd0...c8b0c4 )
by Darko
11:55
created

RegexService::testCollectionRegex()   B

Complexity

Conditions 10
Paths 8

Size

Total Lines 55
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 37
c 1
b 0
f 0
dl 0
loc 55
rs 7.6666
cc 10
nc 8
nop 3

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Services;
6
7
use App\Models\Category;
8
use App\Models\CategoryRegex;
9
use App\Models\CollectionRegex;
10
use App\Models\Release;
11
use App\Models\ReleaseNamingRegex;
12
use App\Models\UsenetGroup;
13
use Illuminate\Support\Arr;
14
use Illuminate\Support\Facades\Cache;
15
use Illuminate\Support\Facades\DB;
16
17
/**
18
 * Service for managing regex patterns for collections, categories, and release naming.
19
 */
20
class RegexService
21
{
22
    /**
23
     * The ID of the Regex input string matched or the generic name
24
     */
25
    public mixed $matchedRegex;
26
27
    /**
28
     * Name of the current table we are working on.
29
     */
30
    public string $tableName;
31
32
    /**
33
     * Cache of regex and their TTL.
34
     */
35
    protected array $_regexCache = [];
36
37
    /**
38
     * Default category ID
39
     */
40
    protected int $_categoriesID = Category::OTHER_MISC;
41
42
    /**
43
     * RegexService constructor.
44
     *
45
     * @param  string  $tableName  The table name to work with (collection_regexes, category_regexes, release_naming_regexes)
46
     */
47
    public function __construct(string $tableName = '')
48
    {
49
        $this->tableName = $tableName;
50
    }
51
52
    /**
53
     * Add a new regex.
54
     */
55
    public function addRegex(array $data): bool
56
    {
57
        return (bool) DB::insert(
58
            sprintf(
59
                'INSERT INTO %s (group_regex, regex, status, description, ordinal%s) VALUES (%s, %s, %d, %s, %d%s)',
60
                $this->tableName,
61
                ($this->tableName === 'category_regexes' ? ', categories_id' : ''),
62
                trim(escapeString($data['group_regex'])),
63
                trim(escapeString($data['regex'])),
64
                $data['status'],
65
                trim(escapeString($data['description'])),
66
                $data['ordinal'],
67
                ($this->tableName === 'category_regexes' ? (', '.$data['categories_id']) : '')
68
            )
69
        );
70
    }
71
72
    /**
73
     * Update a regex with new info.
74
     */
75
    public function updateRegex(array $data): bool
76
    {
77
        return (bool) DB::update(
78
            sprintf(
79
                'UPDATE %s
80
                SET group_regex = %s, regex = %s, status = %d, description = %s, ordinal = %d %s
81
                WHERE id = %d',
82
                $this->tableName,
83
                trim(escapeString($data['group_regex'])),
84
                trim(escapeString($data['regex'])),
85
                $data['status'],
86
                trim(escapeString($data['description'])),
87
                $data['ordinal'],
88
                ($this->tableName === 'category_regexes' ? (', categories_id = '.$data['categories_id']) : ''),
89
                $data['id']
90
            )
91
        );
92
    }
93
94
    /**
95
     * Get a single regex using its id.
96
     */
97
    public function getRegexByID(int $id): array
98
    {
99
        return (array) Arr::first(DB::select(sprintf('SELECT * FROM %s WHERE id = %d LIMIT 1', $this->tableName, $id)));
100
    }
101
102
    /**
103
     * Get paginated regex results.
104
     *
105
     * @return mixed
106
     */
107
    public function getRegex(string $group_regex = '')
108
    {
109
        if ($this->tableName === 'collection_regexes') {
110
            $table = CollectionRegex::class;
111
        } elseif ($this->tableName === 'category_regexes') {
112
            $table = CategoryRegex::class;
113
        } else {
114
            $table = ReleaseNamingRegex::class;
115
        }
116
117
        $result = $table::query();
118
        if ($group_regex !== '') {
119
            $result->where('group_regex', 'like', '%'.$group_regex.'%');
120
        }
121
        $result->orderBy('id');
122
123
        return $result->paginate(config('nntmux.items_per_page'));
124
    }
125
126
    /**
127
     * Get the count of regex in the DB.
128
     *
129
     * @param  string  $group_regex  Optional, keyword to find a group.
130
     */
131
    public function getCount(string $group_regex = ''): int
132
    {
133
        $query = DB::select(
134
            sprintf(
135
                'SELECT COUNT(id) AS count FROM %s %s',
136
                $this->tableName,
137
                $this->_groupQueryString($group_regex)
138
            )
139
        );
140
141
        return (int) $query[0]->count;
142
    }
143
144
    /**
145
     * Delete a regex using its id.
146
     *
147
     * @throws \Throwable
148
     */
149
    public function deleteRegex(int $id): void
150
    {
151
        DB::transaction(function () use ($id) {
152
            DB::delete(sprintf('DELETE FROM %s WHERE id = %d', $this->tableName, $id));
153
        }, 3);
154
    }
155
156
    /**
157
     * Test a single collection regex for a group name.
158
     *
159
     * Requires table per group to be on.
160
     *
161
     * @throws \Exception
162
     */
163
    public function testCollectionRegex(string $groupName, string $regex, int $limit): array
164
    {
165
        $groupID = UsenetGroup::getIDByName($groupName);
166
167
        if (! $groupID) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $groupID of type false|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
168
            return [];
169
        }
170
171
        $rows = DB::select(
172
            'SELECT
173
                    b.name, b.totalparts, b.currentparts, HEX(b.binaryhash) AS binaryhash,
174
                    c.fromname, c.collectionhash
175
                FROM binaries b
176
                INNER JOIN collections c ON c.id = b.collections_id'
177
        );
178
179
        $data = [];
180
        if (\count($rows) > 0) {
181
            $limit--;
182
            $hashes = [];
183
            foreach ($rows as $row) {
184
                if (preg_match($regex, $row->name, $hits)) {
185
                    ksort($hits);
186
                    $string = $string2 = '';
187
                    foreach ($hits as $key => $hit) {
188
                        if (! \is_int($key)) {
189
                            $string .= $hit;
190
                            $string2 .= '<br/>'.$key.': '.$hit;
191
                        }
192
                    }
193
                    $files = 0;
194
                    if (preg_match('/[[(\s](\d{1,5})(\/|[\s_]of[\s_]|-)(\d{1,5})[])\s$:]/i', $row->name, $fileCount)) {
195
                        $files = $fileCount[3];
196
                    }
197
                    $newCollectionHash = sha1($string.$row->fromname.$groupID.$files);
198
                    $data['New hash: '.$newCollectionHash.$string2][$row->binaryhash] = [
199
                        'new_collection_hash' => $newCollectionHash,
200
                        'file_name' => $row->name,
201
                        'file_total_parts' => $row->totalparts,
202
                        'file_current_parts' => $row->currentparts,
203
                        'collection_poster' => $row->fromname,
204
                        'old_collection_hash' => $row->collectionhash,
205
                    ];
206
207
                    if ($limit > 0) {
208
                        if (\count($hashes) > $limit) {
209
                            break;
210
                        }
211
                        $hashes[$newCollectionHash] = '';
212
                    }
213
                }
214
            }
215
        }
216
217
        return $data;
218
    }
219
220
    /**
221
     * Test release naming regex against releases.
222
     *
223
     * @throws \Exception
224
     */
225
    public function testReleaseNamingRegex($groupName, $regex, $displayLimit, $queryLimit): array
226
    {
227
        $groupID = UsenetGroup::getIDByName($groupName);
228
229
        if (! $groupID) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $groupID of type false|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
230
            return [];
231
        }
232
233
        $rows = Release::query()->where('groups_id', $groupID)->select(['name', 'searchname', 'id']);
234
        if ((int) $queryLimit !== 0) {
235
            $rows->limit($queryLimit);
236
        }
237
238
        $rows->get();
239
240
        $data = [];
241
        if ($rows !== null) {
242
            $limit = 1;
243
            foreach ($rows as $row) {
244
                $hit = $this->_matchRegex($regex, $row['name']);
245
                if ($hit) {
246
                    $data[$row['id']] = [
247
                        'subject' => $row['name'],
248
                        'old_name' => $row['searchname'],
249
                        'new_name' => $hit,
250
                    ];
251
                    if ((int) $displayLimit > 0 && $limit++ >= (int) $displayLimit) {
252
                        break;
253
                    }
254
                }
255
            }
256
        }
257
258
        return $data;
259
    }
260
261
    /**
262
     * This will try to find regex in the DB for a group and a usenet subject, attempt to match them and return the matches.
263
     *
264
     * @throws \Exception
265
     */
266
    public function tryRegex(string $subject, string $groupName): string
267
    {
268
        $this->matchedRegex = 0;
269
270
        $this->_fetchRegex($groupName);
271
272
        $returnString = '';
273
        // If there are no regex, return and try regex in this file.
274
        if ($this->_regexCache[$groupName]['regex']) {
275
            foreach ($this->_regexCache[$groupName]['regex'] as $regex) {
276
                if ($this->tableName === 'category_regexes') {
277
                    $this->_categoriesID = $regex->categories_id;
278
                }
279
280
                $returnString = $this->_matchRegex($regex->regex, $subject);
281
                // If this regex found something, break and return, or else continue trying other regex.
282
                if ($returnString) {
283
                    $this->matchedRegex = $regex->id;
284
                    break;
285
                }
286
            }
287
        }
288
289
        return $returnString;
290
    }
291
292
    /**
293
     * Get the regex from the DB, cache them locally for 15 mins.
294
     * Cache them also in the cache server, as this script might be terminated.
295
     */
296
    protected function _fetchRegex(string $groupName): void
297
    {
298
        // Get all regex from DB which match the current group name. Cache them for 15 minutes. #CACHEDQUERY#
299
        $sql = sprintf(
300
            'SELECT r.id, r.regex %s FROM %s r WHERE \'%s\' REGEXP r.group_regex AND r.status = 1 ORDER BY r.ordinal ASC, r.group_regex ASC',
301
            ($this->tableName === 'category_regexes' ? ', r.categories_id' : ''),
302
            $this->tableName,
303
            $groupName
304
        );
305
306
        $this->_regexCache[$groupName]['regex'] = Cache::get(md5($sql));
307
        if ($this->_regexCache[$groupName]['regex'] !== null) {
308
            return;
309
        }
310
        $this->_regexCache[$groupName]['regex'] = DB::select($sql);
311
        $expiresAt = now()->addMinutes(config('nntmux.cache_expiry_long'));
312
        Cache::put(md5($sql), $this->_regexCache[$groupName]['regex'], $expiresAt);
313
    }
314
315
    /**
316
     * Find matches on a regex taken from the database.
317
     *
318
     * Requires at least 1 named captured group.
319
     *
320
     * @throws \Exception
321
     */
322
    protected function _matchRegex(string $regex, string $subject): string
323
    {
324
        $returnString = '';
325
        if (preg_match($regex, $subject, $hits) && \count($hits) > 0) {
326
            // Sort the keys, the named key matches will be concatenated in this order.
327
            ksort($hits);
328
            foreach ($hits as $key => $value) {
329
                switch ($this->tableName) {
330
                    case 'collection_regexes': // Put this at the top since it's the most important for performance.
331
                    case 'release_naming_regexes':
332
                        // Ignore non-named capture groups. Only named capture groups are important.
333
                        if (\is_int($key) || preg_match('#reqid|parts#i', $key)) {
334
                            continue 2;
335
                        }
336
                        $returnString .= $value; // Concatenate the string to return.
337
                        break;
338
                    case 'category_regexes':
339
                        $returnString = (string) $this->_categoriesID; // Regex matched, so return the category ID.
340
                        break 2;
341
                }
342
            }
343
        }
344
345
        return $returnString;
346
    }
347
348
    /**
349
     * Format part of a query.
350
     */
351
    protected function _groupQueryString(string $group_regex): string
352
    {
353
        return $group_regex ? ('WHERE group_regex LIKE '.escapeString('%'.$group_regex.'%')) : '';
354
    }
355
}
356
357