| Total Complexity | 50 |
| Total Lines | 334 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like RegexService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use RegexService, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 20 | class RegexService |
||
| 21 | { |
||
| 22 | /** |
||
| 23 | * The ID of the Regex input string matched or the generic name |
||
| 24 | */ |
||
| 25 | public mixed $matchedRegex; |
||
| 26 | |||
| 27 | /** |
||
| 28 | * Name of the current table we are working on. |
||
| 29 | */ |
||
| 30 | public string $tableName; |
||
| 31 | |||
| 32 | /** |
||
| 33 | * Cache of regex and their TTL. |
||
| 34 | */ |
||
| 35 | protected array $_regexCache = []; |
||
| 36 | |||
| 37 | /** |
||
| 38 | * Default category ID |
||
| 39 | */ |
||
| 40 | protected int $_categoriesID = Category::OTHER_MISC; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * RegexService constructor. |
||
| 44 | * |
||
| 45 | * @param string $tableName The table name to work with (collection_regexes, category_regexes, release_naming_regexes) |
||
| 46 | */ |
||
| 47 | public function __construct(string $tableName = '') |
||
| 48 | { |
||
| 49 | $this->tableName = $tableName; |
||
| 50 | } |
||
| 51 | |||
| 52 | /** |
||
| 53 | * Add a new regex. |
||
| 54 | */ |
||
| 55 | public function addRegex(array $data): bool |
||
| 56 | { |
||
| 57 | return (bool) DB::insert( |
||
| 58 | sprintf( |
||
| 59 | 'INSERT INTO %s (group_regex, regex, status, description, ordinal%s) VALUES (%s, %s, %d, %s, %d%s)', |
||
| 60 | $this->tableName, |
||
| 61 | ($this->tableName === 'category_regexes' ? ', categories_id' : ''), |
||
| 62 | trim(escapeString($data['group_regex'])), |
||
| 63 | trim(escapeString($data['regex'])), |
||
| 64 | $data['status'], |
||
| 65 | trim(escapeString($data['description'])), |
||
| 66 | $data['ordinal'], |
||
| 67 | ($this->tableName === 'category_regexes' ? (', '.$data['categories_id']) : '') |
||
| 68 | ) |
||
| 69 | ); |
||
| 70 | } |
||
| 71 | |||
| 72 | /** |
||
| 73 | * Update a regex with new info. |
||
| 74 | */ |
||
| 75 | public function updateRegex(array $data): bool |
||
| 90 | ) |
||
| 91 | ); |
||
| 92 | } |
||
| 93 | |||
| 94 | /** |
||
| 95 | * Get a single regex using its id. |
||
| 96 | */ |
||
| 97 | public function getRegexByID(int $id): array |
||
| 98 | { |
||
| 99 | return (array) Arr::first(DB::select(sprintf('SELECT * FROM %s WHERE id = %d LIMIT 1', $this->tableName, $id))); |
||
| 100 | } |
||
| 101 | |||
| 102 | /** |
||
| 103 | * Get paginated regex results. |
||
| 104 | * |
||
| 105 | * @return mixed |
||
| 106 | */ |
||
| 107 | public function getRegex(string $group_regex = '') |
||
| 108 | { |
||
| 109 | if ($this->tableName === 'collection_regexes') { |
||
| 110 | $table = CollectionRegex::class; |
||
| 111 | } elseif ($this->tableName === 'category_regexes') { |
||
| 112 | $table = CategoryRegex::class; |
||
| 113 | } else { |
||
| 114 | $table = ReleaseNamingRegex::class; |
||
| 115 | } |
||
| 116 | |||
| 117 | $result = $table::query(); |
||
| 118 | if ($group_regex !== '') { |
||
| 119 | $result->where('group_regex', 'like', '%'.$group_regex.'%'); |
||
| 120 | } |
||
| 121 | $result->orderBy('id'); |
||
| 122 | |||
| 123 | return $result->paginate(config('nntmux.items_per_page')); |
||
| 124 | } |
||
| 125 | |||
| 126 | /** |
||
| 127 | * Get the count of regex in the DB. |
||
| 128 | * |
||
| 129 | * @param string $group_regex Optional, keyword to find a group. |
||
| 130 | */ |
||
| 131 | public function getCount(string $group_regex = ''): int |
||
| 142 | } |
||
| 143 | |||
| 144 | /** |
||
| 145 | * Delete a regex using its id. |
||
| 146 | * |
||
| 147 | * @throws \Throwable |
||
| 148 | */ |
||
| 149 | public function deleteRegex(int $id): void |
||
| 150 | { |
||
| 151 | DB::transaction(function () use ($id) { |
||
| 152 | DB::delete(sprintf('DELETE FROM %s WHERE id = %d', $this->tableName, $id)); |
||
| 153 | }, 3); |
||
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Test a single collection regex for a group name. |
||
| 158 | * |
||
| 159 | * Requires table per group to be on. |
||
| 160 | * |
||
| 161 | * @throws \Exception |
||
| 162 | */ |
||
| 163 | public function testCollectionRegex(string $groupName, string $regex, int $limit): array |
||
| 164 | { |
||
| 165 | $groupID = UsenetGroup::getIDByName($groupName); |
||
| 166 | |||
| 167 | if (! $groupID) { |
||
|
|
|||
| 168 | return []; |
||
| 169 | } |
||
| 170 | |||
| 171 | $rows = DB::select( |
||
| 172 | 'SELECT |
||
| 173 | b.name, b.totalparts, b.currentparts, HEX(b.binaryhash) AS binaryhash, |
||
| 174 | c.fromname, c.collectionhash |
||
| 175 | FROM binaries b |
||
| 176 | INNER JOIN collections c ON c.id = b.collections_id' |
||
| 177 | ); |
||
| 178 | |||
| 179 | $data = []; |
||
| 180 | if (\count($rows) > 0) { |
||
| 181 | $limit--; |
||
| 182 | $hashes = []; |
||
| 183 | foreach ($rows as $row) { |
||
| 184 | if (preg_match($regex, $row->name, $hits)) { |
||
| 185 | ksort($hits); |
||
| 186 | $string = $string2 = ''; |
||
| 187 | foreach ($hits as $key => $hit) { |
||
| 188 | if (! \is_int($key)) { |
||
| 189 | $string .= $hit; |
||
| 190 | $string2 .= '<br/>'.$key.': '.$hit; |
||
| 191 | } |
||
| 192 | } |
||
| 193 | $files = 0; |
||
| 194 | if (preg_match('/[[(\s](\d{1,5})(\/|[\s_]of[\s_]|-)(\d{1,5})[])\s$:]/i', $row->name, $fileCount)) { |
||
| 195 | $files = $fileCount[3]; |
||
| 196 | } |
||
| 197 | $newCollectionHash = sha1($string.$row->fromname.$groupID.$files); |
||
| 198 | $data['New hash: '.$newCollectionHash.$string2][$row->binaryhash] = [ |
||
| 199 | 'new_collection_hash' => $newCollectionHash, |
||
| 200 | 'file_name' => $row->name, |
||
| 201 | 'file_total_parts' => $row->totalparts, |
||
| 202 | 'file_current_parts' => $row->currentparts, |
||
| 203 | 'collection_poster' => $row->fromname, |
||
| 204 | 'old_collection_hash' => $row->collectionhash, |
||
| 205 | ]; |
||
| 206 | |||
| 207 | if ($limit > 0) { |
||
| 208 | if (\count($hashes) > $limit) { |
||
| 209 | break; |
||
| 210 | } |
||
| 211 | $hashes[$newCollectionHash] = ''; |
||
| 212 | } |
||
| 213 | } |
||
| 214 | } |
||
| 215 | } |
||
| 216 | |||
| 217 | return $data; |
||
| 218 | } |
||
| 219 | |||
| 220 | /** |
||
| 221 | * Test release naming regex against releases. |
||
| 222 | * |
||
| 223 | * @throws \Exception |
||
| 224 | */ |
||
| 225 | public function testReleaseNamingRegex($groupName, $regex, $displayLimit, $queryLimit): array |
||
| 226 | { |
||
| 227 | $groupID = UsenetGroup::getIDByName($groupName); |
||
| 228 | |||
| 229 | if (! $groupID) { |
||
| 230 | return []; |
||
| 231 | } |
||
| 232 | |||
| 233 | $rows = Release::query()->where('groups_id', $groupID)->select(['name', 'searchname', 'id']); |
||
| 234 | if ((int) $queryLimit !== 0) { |
||
| 235 | $rows->limit($queryLimit); |
||
| 236 | } |
||
| 237 | |||
| 238 | $rows->get(); |
||
| 239 | |||
| 240 | $data = []; |
||
| 241 | if ($rows !== null) { |
||
| 242 | $limit = 1; |
||
| 243 | foreach ($rows as $row) { |
||
| 244 | $hit = $this->_matchRegex($regex, $row['name']); |
||
| 245 | if ($hit) { |
||
| 246 | $data[$row['id']] = [ |
||
| 247 | 'subject' => $row['name'], |
||
| 248 | 'old_name' => $row['searchname'], |
||
| 249 | 'new_name' => $hit, |
||
| 250 | ]; |
||
| 251 | if ((int) $displayLimit > 0 && $limit++ >= (int) $displayLimit) { |
||
| 252 | break; |
||
| 253 | } |
||
| 254 | } |
||
| 255 | } |
||
| 256 | } |
||
| 257 | |||
| 258 | return $data; |
||
| 259 | } |
||
| 260 | |||
| 261 | /** |
||
| 262 | * This will try to find regex in the DB for a group and a usenet subject, attempt to match them and return the matches. |
||
| 263 | * |
||
| 264 | * @throws \Exception |
||
| 265 | */ |
||
| 266 | public function tryRegex(string $subject, string $groupName): string |
||
| 267 | { |
||
| 268 | $this->matchedRegex = 0; |
||
| 269 | |||
| 270 | $this->_fetchRegex($groupName); |
||
| 271 | |||
| 272 | $returnString = ''; |
||
| 273 | // If there are no regex, return and try regex in this file. |
||
| 274 | if ($this->_regexCache[$groupName]['regex']) { |
||
| 275 | foreach ($this->_regexCache[$groupName]['regex'] as $regex) { |
||
| 276 | if ($this->tableName === 'category_regexes') { |
||
| 277 | $this->_categoriesID = $regex->categories_id; |
||
| 278 | } |
||
| 279 | |||
| 280 | $returnString = $this->_matchRegex($regex->regex, $subject); |
||
| 281 | // If this regex found something, break and return, or else continue trying other regex. |
||
| 282 | if ($returnString) { |
||
| 283 | $this->matchedRegex = $regex->id; |
||
| 284 | break; |
||
| 285 | } |
||
| 286 | } |
||
| 287 | } |
||
| 288 | |||
| 289 | return $returnString; |
||
| 290 | } |
||
| 291 | |||
| 292 | /** |
||
| 293 | * Get the regex from the DB, cache them locally for 15 mins. |
||
| 294 | * Cache them also in the cache server, as this script might be terminated. |
||
| 295 | */ |
||
| 296 | protected function _fetchRegex(string $groupName): void |
||
| 297 | { |
||
| 298 | // Get all regex from DB which match the current group name. Cache them for 15 minutes. #CACHEDQUERY# |
||
| 299 | $sql = sprintf( |
||
| 300 | 'SELECT r.id, r.regex %s FROM %s r WHERE \'%s\' REGEXP r.group_regex AND r.status = 1 ORDER BY r.ordinal ASC, r.group_regex ASC', |
||
| 301 | ($this->tableName === 'category_regexes' ? ', r.categories_id' : ''), |
||
| 302 | $this->tableName, |
||
| 303 | $groupName |
||
| 304 | ); |
||
| 305 | |||
| 306 | $this->_regexCache[$groupName]['regex'] = Cache::get(md5($sql)); |
||
| 307 | if ($this->_regexCache[$groupName]['regex'] !== null) { |
||
| 308 | return; |
||
| 309 | } |
||
| 310 | $this->_regexCache[$groupName]['regex'] = DB::select($sql); |
||
| 311 | $expiresAt = now()->addMinutes(config('nntmux.cache_expiry_long')); |
||
| 312 | Cache::put(md5($sql), $this->_regexCache[$groupName]['regex'], $expiresAt); |
||
| 313 | } |
||
| 314 | |||
| 315 | /** |
||
| 316 | * Find matches on a regex taken from the database. |
||
| 317 | * |
||
| 318 | * Requires at least 1 named captured group. |
||
| 319 | * |
||
| 320 | * @throws \Exception |
||
| 321 | */ |
||
| 322 | protected function _matchRegex(string $regex, string $subject): string |
||
| 346 | } |
||
| 347 | |||
| 348 | /** |
||
| 349 | * Format part of a query. |
||
| 350 | */ |
||
| 351 | protected function _groupQueryString(string $group_regex): string |
||
| 354 | } |
||
| 355 | } |
||
| 356 | |||
| 357 |
In PHP, under loose comparison (like
==, or!=, orswitchconditions), values of different types might be equal.For
integervalues, zero is a special case, in particular the following results might be unexpected: