| Total Complexity | 180 |
| Total Lines | 995 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like ArchiveExtractionService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ArchiveExtractionService, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | class ArchiveExtractionService |
||
| 18 | { |
||
| 19 | private ArchiveInfo $archiveInfo; |
||
| 20 | |||
| 21 | private Par2Info $par2Info; |
||
| 22 | |||
| 23 | public function __construct( |
||
| 24 | private readonly ProcessingConfiguration $config |
||
| 25 | ) { |
||
| 26 | $this->archiveInfo = new ArchiveInfo; |
||
| 27 | $this->par2Info = new Par2Info; |
||
| 28 | |||
| 29 | // Configure external clients for ArchiveInfo |
||
| 30 | if ($this->config->unrarPath) { |
||
| 31 | $this->archiveInfo->setExternalClients([ArchiveInfo::TYPE_RAR => $this->config->unrarPath]); |
||
| 32 | } |
||
| 33 | } |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Process compressed data and extract file information. |
||
| 37 | * |
||
| 38 | * @return array{success: bool, files: array, hasPassword: bool, passwordStatus: int} |
||
| 39 | */ |
||
| 40 | public function processCompressedData( |
||
| 41 | string $compressedData, |
||
| 42 | ReleaseProcessingContext $context, |
||
| 43 | string $tmpPath |
||
| 44 | ): array { |
||
| 45 | $result = [ |
||
| 46 | 'success' => false, |
||
| 47 | 'files' => [], |
||
| 48 | 'hasPassword' => false, |
||
| 49 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 50 | ]; |
||
| 51 | |||
| 52 | $context->compressedFilesChecked++; |
||
| 53 | |||
| 54 | // Detect archive type early |
||
| 55 | $archiveType = $this->detectArchiveType($compressedData); |
||
| 56 | |||
| 57 | // Handle 7z, gzip, bzip2, xz with external 7zip binary |
||
| 58 | if (in_array($archiveType, ['7z', 'gzip', 'bzip2', 'xz'], true)) { |
||
| 59 | if ($archiveType === '7z') { |
||
| 60 | $sevenZipResult = $this->processSevenZipArchive($compressedData, $context, $tmpPath); |
||
| 61 | if ($sevenZipResult['success'] || $sevenZipResult['hasPassword']) { |
||
| 62 | return $sevenZipResult; |
||
| 63 | } |
||
| 64 | } |
||
| 65 | |||
| 66 | if ($this->config->sevenZipPath) { |
||
| 67 | $extractResult = $this->extractViaSevenZip($compressedData, $archiveType, $tmpPath); |
||
| 68 | if ($extractResult['success']) { |
||
| 69 | return $extractResult; |
||
| 70 | } |
||
| 71 | } |
||
| 72 | } |
||
| 73 | |||
| 74 | // Try ArchiveInfo for RAR/ZIP |
||
| 75 | if (! $this->archiveInfo->setData($compressedData, true)) { |
||
| 76 | // Handle standalone video detection |
||
| 77 | $videoType = $this->detectStandaloneVideo($compressedData); |
||
| 78 | if ($videoType !== null) { |
||
| 79 | return [ |
||
| 80 | 'success' => false, |
||
| 81 | 'files' => [], |
||
| 82 | 'hasPassword' => false, |
||
| 83 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 84 | 'standaloneVideoType' => $videoType, |
||
| 85 | 'standaloneVideoData' => $compressedData, |
||
| 86 | ]; |
||
| 87 | } |
||
| 88 | |||
| 89 | return $result; |
||
| 90 | } |
||
| 91 | |||
| 92 | if ($this->archiveInfo->error !== '') { |
||
| 93 | if ($this->config->debugMode) { |
||
| 94 | Log::debug('ArchiveInfo Error: '.$this->archiveInfo->error); |
||
| 95 | } |
||
| 96 | |||
| 97 | return $result; |
||
| 98 | } |
||
| 99 | |||
| 100 | try { |
||
| 101 | $dataSummary = $this->archiveInfo->getSummary(true); |
||
| 102 | } catch (\Exception $e) { |
||
| 103 | if ($this->config->debugMode) { |
||
| 104 | Log::warning($e->getTraceAsString()); |
||
| 105 | } |
||
| 106 | |||
| 107 | return $result; |
||
| 108 | } |
||
| 109 | |||
| 110 | // Check for encryption |
||
| 111 | if (! empty($this->archiveInfo->isEncrypted) |
||
| 112 | || (isset($dataSummary['is_encrypted']) && (int) $dataSummary['is_encrypted'] !== 0) |
||
| 113 | ) { |
||
| 114 | if ($this->config->debugMode) { |
||
| 115 | Log::debug('ArchiveInfo: Compressed file has a password.'); |
||
| 116 | } |
||
| 117 | |||
| 118 | return [ |
||
| 119 | 'success' => false, |
||
| 120 | 'files' => [], |
||
| 121 | 'hasPassword' => true, |
||
| 122 | 'passwordStatus' => ReleaseBrowseService::PASSWD_RAR, |
||
| 123 | ]; |
||
| 124 | } |
||
| 125 | |||
| 126 | // Prepare extraction directories |
||
| 127 | $this->prepareExtractionDirectories($tmpPath); |
||
| 128 | |||
| 129 | // Process based on archive type |
||
| 130 | $archiveMarker = $this->extractArchive($compressedData, $dataSummary, $tmpPath); |
||
| 131 | |||
| 132 | // Get file list |
||
| 133 | $files = $this->archiveInfo->getArchiveFileList(); |
||
| 134 | if (! is_array($files) || count($files) === 0) { |
||
| 135 | return $result; |
||
| 136 | } |
||
| 137 | |||
| 138 | return [ |
||
| 139 | 'success' => true, |
||
| 140 | 'files' => $files, |
||
| 141 | 'hasPassword' => false, |
||
| 142 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 143 | 'archiveMarker' => $archiveMarker, |
||
| 144 | 'dataSummary' => $dataSummary, |
||
| 145 | ]; |
||
| 146 | } |
||
| 147 | |||
| 148 | /** |
||
| 149 | * Detect the archive type from binary signature. |
||
| 150 | */ |
||
| 151 | public function detectArchiveType(string $data): ?string |
||
| 152 | { |
||
| 153 | $head6 = substr($data, 0, 6); |
||
| 154 | $head4 = substr($data, 0, 4); |
||
| 155 | |||
| 156 | // 7z signature |
||
| 157 | if ($head6 === "\x37\x7A\xBC\xAF\x27\x1C" && $this->isLikely7z($data)) { |
||
| 158 | return '7z'; |
||
| 159 | } |
||
| 160 | // GZIP |
||
| 161 | if (strncmp($head4, "\x1F\x8B\x08", 3) === 0) { |
||
| 162 | return 'gzip'; |
||
| 163 | } |
||
| 164 | // BZip2 |
||
| 165 | if (strncmp($head4, 'BZh', 3) === 0) { |
||
| 166 | return 'bzip2'; |
||
| 167 | } |
||
| 168 | // XZ |
||
| 169 | if ($head6 === "\xFD7zXZ\x00") { |
||
| 170 | return 'xz'; |
||
| 171 | } |
||
| 172 | // PDF (skip) |
||
| 173 | if ($head4 === '%PDF') { |
||
| 174 | return 'pdf'; |
||
| 175 | } |
||
| 176 | |||
| 177 | return null; |
||
| 178 | } |
||
| 179 | |||
| 180 | /** |
||
| 181 | * Heuristic validation for 7z signature. |
||
| 182 | */ |
||
| 183 | private function isLikely7z(string $data): bool |
||
| 184 | { |
||
| 185 | if (strlen($data) < 32) { |
||
| 186 | return false; |
||
| 187 | } |
||
| 188 | $verMajor = ord($data[6]); |
||
| 189 | $verMinor = ord($data[7]); |
||
| 190 | if ($verMajor !== 0x00 || $verMinor < 0x02 || $verMinor > 0x09) { |
||
| 191 | return false; |
||
| 192 | } |
||
| 193 | $crc = substr($data, 8, 4); |
||
| 194 | if ($crc === "\x00\x00\x00\x00" || $crc === "\xFF\xFF\xFF\xFF") { |
||
| 195 | return false; |
||
| 196 | } |
||
| 197 | |||
| 198 | return true; |
||
| 199 | } |
||
| 200 | |||
| 201 | /** |
||
| 202 | * Process a 7z archive using external binary and internal header parsing. |
||
| 203 | */ |
||
| 204 | private function processSevenZipArchive( |
||
| 205 | string $compressedData, |
||
| 206 | ReleaseProcessingContext $context, |
||
| 207 | string $tmpPath |
||
| 208 | ): array { |
||
| 209 | $result = [ |
||
| 210 | 'success' => false, |
||
| 211 | 'files' => [], |
||
| 212 | 'hasPassword' => false, |
||
| 213 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 214 | ]; |
||
| 215 | |||
| 216 | if (! $this->config->sevenZipPath) { |
||
| 217 | return $result; |
||
| 218 | } |
||
| 219 | |||
| 220 | // Try listing with external 7z binary |
||
| 221 | $listed = $this->listSevenZipEntries($compressedData, $tmpPath); |
||
| 222 | if (! empty($listed)) { |
||
| 223 | if (! empty($listed[0]['__any_encrypted__'])) { |
||
| 224 | return [ |
||
| 225 | 'success' => false, |
||
| 226 | 'files' => [], |
||
| 227 | 'hasPassword' => true, |
||
| 228 | 'passwordStatus' => ReleaseBrowseService::PASSWD_RAR, |
||
| 229 | ]; |
||
| 230 | } |
||
| 231 | |||
| 232 | $files = $this->filterSevenZipFiles($listed); |
||
| 233 | if (! empty($files)) { |
||
| 234 | return [ |
||
| 235 | 'success' => true, |
||
| 236 | 'files' => $files, |
||
| 237 | 'hasPassword' => false, |
||
| 238 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 239 | 'archiveMarker' => '7z', |
||
| 240 | ]; |
||
| 241 | } |
||
| 242 | } |
||
| 243 | |||
| 244 | // Fallback: scan for filenames in raw data |
||
| 245 | $scannedNames = $this->scanSevenZipFilenames($compressedData); |
||
| 246 | if (! empty($scannedNames)) { |
||
| 247 | $files = array_map(fn ($name) => [ |
||
| 248 | 'name' => $name, |
||
| 249 | 'size' => 0, |
||
| 250 | 'date' => time(), |
||
| 251 | 'pass' => 0, |
||
| 252 | 'crc32' => '', |
||
| 253 | 'source' => '7z-scan', |
||
| 254 | ], $scannedNames); |
||
| 255 | |||
| 256 | return [ |
||
| 257 | 'success' => true, |
||
| 258 | 'files' => $files, |
||
| 259 | 'hasPassword' => false, |
||
| 260 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 261 | 'archiveMarker' => '7z', |
||
| 262 | ]; |
||
| 263 | } |
||
| 264 | |||
| 265 | return $result; |
||
| 266 | } |
||
| 267 | |||
| 268 | /** |
||
| 269 | * List entries of a 7z archive using external 7z binary. |
||
| 270 | */ |
||
| 271 | public function listSevenZipEntries(string $compressedData, string $tmpPath): array |
||
| 272 | { |
||
| 273 | if (! $this->config->sevenZipPath) { |
||
| 274 | return []; |
||
| 275 | } |
||
| 276 | |||
| 277 | try { |
||
| 278 | $tmpFile = $tmpPath.uniqid('7zlist_', true).'.7z'; |
||
| 279 | if (File::put($tmpFile, $compressedData) === false) { |
||
| 280 | return []; |
||
| 281 | } |
||
| 282 | |||
| 283 | $cmd = [$this->config->sevenZipPath, 'l', '-slt', '-ba', '-bd', $tmpFile]; |
||
| 284 | $exitCode = 0; |
||
| 285 | $stdout = null; |
||
| 286 | $stderr = null; |
||
| 287 | $ok = $this->execCommand($cmd, $exitCode, $stdout, $stderr); |
||
| 288 | |||
| 289 | if (! $ok || $exitCode !== 0 || empty($stdout)) { |
||
| 290 | // Try plain listing fallback |
||
| 291 | $plainResult = $this->listSevenZipPlain($tmpFile); |
||
| 292 | File::delete($tmpFile); |
||
| 293 | |||
| 294 | return $plainResult; |
||
| 295 | } |
||
| 296 | |||
| 297 | File::delete($tmpFile); |
||
| 298 | |||
| 299 | return $this->parseSevenZipStructuredOutput($stdout); |
||
| 300 | } catch (\Throwable $e) { |
||
| 301 | if ($this->config->debugMode) { |
||
| 302 | Log::debug('Exception listing 7z: '.$e->getMessage()); |
||
| 303 | } |
||
| 304 | |||
| 305 | return []; |
||
| 306 | } |
||
| 307 | } |
||
| 308 | |||
| 309 | /** |
||
| 310 | * Plain 7z listing fallback. |
||
| 311 | */ |
||
| 312 | private function listSevenZipPlain(string $tmpFile): array |
||
| 313 | { |
||
| 314 | $cmd = [$this->config->sevenZipPath, 'l', '-ba', '-bd', $tmpFile]; |
||
| 315 | $exitCode = 0; |
||
| 316 | $stdout = null; |
||
| 317 | $stderr = null; |
||
| 318 | $ok = $this->execCommand($cmd, $exitCode, $stdout, $stderr); |
||
| 319 | |||
| 320 | if (! $ok || $exitCode !== 0 || empty($stdout)) { |
||
| 321 | return []; |
||
| 322 | } |
||
| 323 | |||
| 324 | $files = []; |
||
| 325 | $lines = preg_split('/\r?\n/', trim($stdout)); |
||
| 326 | foreach ($lines as $line) { |
||
| 327 | $line = trim($line); |
||
| 328 | if ($line === '' || str_starts_with($line, '-----') |
||
| 329 | || str_contains($line, ' Date ') |
||
| 330 | || str_starts_with($line, 'Scanning ') |
||
| 331 | ) { |
||
| 332 | continue; |
||
| 333 | } |
||
| 334 | |||
| 335 | $name = null; |
||
| 336 | if (preg_match('/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\s+\S+\s+\d+\s+\d+\s+(\S.*)$/', $line, $m)) { |
||
| 337 | $name = $m[1]; |
||
| 338 | } elseif (preg_match('/([A-Za-z0-9_#@()\[\]\-+&., ]+\.[A-Za-z0-9]{2,8})$/', $line, $m2)) { |
||
| 339 | $name = trim($m2[1]); |
||
| 340 | } |
||
| 341 | |||
| 342 | if ($name && strlen($name) <= 300) { |
||
| 343 | $files[] = ['name' => trim($name), 'size' => 0, 'encrypted' => false]; |
||
| 344 | if (count($files) >= 200) { |
||
| 345 | break; |
||
| 346 | } |
||
| 347 | } |
||
| 348 | } |
||
| 349 | |||
| 350 | return $files; |
||
| 351 | } |
||
| 352 | |||
| 353 | /** |
||
| 354 | * Parse structured 7z output. |
||
| 355 | */ |
||
| 356 | private function parseSevenZipStructuredOutput(string $output): array |
||
| 357 | { |
||
| 358 | $blocks = preg_split('/\n\n+/u', trim($output)); |
||
| 359 | $files = []; |
||
| 360 | $anyEncrypted = false; |
||
| 361 | |||
| 362 | foreach ($blocks as $block) { |
||
| 363 | $lines = preg_split('/\r?\n/', trim($block)); |
||
| 364 | $row = []; |
||
| 365 | foreach ($lines as $line) { |
||
| 366 | $kv = explode(' = ', $line, 2); |
||
| 367 | if (count($kv) === 2) { |
||
| 368 | $row[$kv[0]] = $kv[1]; |
||
| 369 | } |
||
| 370 | } |
||
| 371 | |||
| 372 | if (empty($row['Path'])) { |
||
| 373 | continue; |
||
| 374 | } |
||
| 375 | |||
| 376 | $attr = $row['Attributes'] ?? ''; |
||
| 377 | if (str_contains($attr, 'D')) { |
||
| 378 | continue; // directory |
||
| 379 | } |
||
| 380 | |||
| 381 | $encrypted = ($row['Encrypted'] ?? '') === '+'; |
||
| 382 | if ($encrypted) { |
||
| 383 | $anyEncrypted = true; |
||
| 384 | } |
||
| 385 | |||
| 386 | $size = isset($row['Size']) && ctype_digit($row['Size']) ? (int) $row['Size'] : 0; |
||
| 387 | $files[] = ['name' => $row['Path'], 'size' => $size, 'encrypted' => $encrypted]; |
||
| 388 | |||
| 389 | if (count($files) >= 200) { |
||
| 390 | break; |
||
| 391 | } |
||
| 392 | } |
||
| 393 | |||
| 394 | if ($anyEncrypted && isset($files[0])) { |
||
| 395 | $files[0]['__any_encrypted__'] = true; |
||
| 396 | } |
||
| 397 | |||
| 398 | return $files; |
||
| 399 | } |
||
| 400 | |||
| 401 | /** |
||
| 402 | * Filter 7z files using extension whitelist. |
||
| 403 | */ |
||
| 404 | private function filterSevenZipFiles(array $files): array |
||
| 405 | { |
||
| 406 | $allowedExtensions = $this->getAllowedExtensions(); |
||
| 407 | $filtered = []; |
||
| 408 | |||
| 409 | foreach ($files as $entry) { |
||
| 410 | if (! empty($entry['__any_encrypted__'])) { |
||
| 411 | continue; |
||
| 412 | } |
||
| 413 | |||
| 414 | $name = $entry['name'] ?? ''; |
||
| 415 | if ($name === '' || strlen($name) > 300) { |
||
| 416 | continue; |
||
| 417 | } |
||
| 418 | |||
| 419 | $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); |
||
| 420 | if (! in_array($ext, $allowedExtensions, true)) { |
||
| 421 | continue; |
||
| 422 | } |
||
| 423 | |||
| 424 | $base = pathinfo($name, PATHINFO_FILENAME); |
||
| 425 | $letterCount = preg_match_all('/[a-z]/i', $base); |
||
| 426 | if ($letterCount <= 5) { |
||
| 427 | continue; |
||
| 428 | } |
||
| 429 | |||
| 430 | $filtered[] = [ |
||
| 431 | 'name' => $name, |
||
| 432 | 'size' => $entry['size'] ?? 0, |
||
| 433 | 'date' => time(), |
||
| 434 | 'pass' => 0, |
||
| 435 | 'crc32' => '', |
||
| 436 | 'source' => '7z-list', |
||
| 437 | ]; |
||
| 438 | |||
| 439 | if (count($filtered) >= 50) { |
||
| 440 | break; |
||
| 441 | } |
||
| 442 | } |
||
| 443 | |||
| 444 | return $filtered; |
||
| 445 | } |
||
| 446 | |||
| 447 | /** |
||
| 448 | * Scan for filenames in 7z raw data. |
||
| 449 | */ |
||
| 450 | private function scanSevenZipFilenames(string $data): array |
||
| 481 | } |
||
| 482 | |||
| 483 | /** |
||
| 484 | * Extract using 7zip binary. |
||
| 485 | */ |
||
| 486 | public function extractViaSevenZip(string $compressedData, string $type, string $tmpPath): array |
||
| 487 | { |
||
| 488 | $result = [ |
||
| 489 | 'success' => false, |
||
| 490 | 'files' => [], |
||
| 491 | 'hasPassword' => false, |
||
| 492 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 493 | ]; |
||
| 494 | |||
| 495 | if ($this->config->extractUsingRarInfo || ! $this->config->sevenZipPath) { |
||
| 496 | return $result; |
||
| 497 | } |
||
| 498 | |||
| 499 | try { |
||
| 500 | $extMap = ['7z' => '7z', 'gzip' => 'gz', 'bzip2' => 'bz2', 'xz' => 'xz']; |
||
| 501 | $markerMap = ['7z' => '7z', 'gzip' => 'g', 'bzip2' => 'b', 'xz' => 'x']; |
||
| 502 | $ext = $extMap[$type] ?? 'dat'; |
||
| 503 | $marker = $markerMap[$type] ?? $type; |
||
| 504 | |||
| 505 | $extractDir = $tmpPath.'un7z/'.uniqid('', true).'/'; |
||
| 506 | if (! File::isDirectory($extractDir)) { |
||
| 507 | File::makeDirectory($extractDir, 0777, true, true); |
||
| 508 | } |
||
| 509 | |||
| 510 | $fileName = $tmpPath.uniqid('', true).'.'.$ext; |
||
| 511 | File::put($fileName, $compressedData); |
||
| 512 | |||
| 513 | $cmd = [$this->config->sevenZipPath, 'e', '-y', '-bd', '-o'.$extractDir, $fileName]; |
||
| 514 | $exitCode = 0; |
||
| 515 | $stdout = null; |
||
| 516 | $stderr = null; |
||
| 517 | $this->execCommand($cmd, $exitCode, $stdout, $stderr); |
||
| 518 | |||
| 519 | $files = []; |
||
| 520 | if (File::isDirectory($extractDir)) { |
||
| 521 | foreach (File::allFiles($extractDir) as $f) { |
||
| 522 | $files[] = [ |
||
| 523 | 'name' => $f->getFilename(), |
||
| 524 | 'size' => $f->getSize(), |
||
| 525 | 'date' => time(), |
||
| 526 | 'pass' => 0, |
||
| 527 | 'crc32' => '', |
||
| 528 | 'source' => $type, |
||
| 529 | ]; |
||
| 530 | } |
||
| 531 | } |
||
| 532 | |||
| 533 | File::delete($fileName); |
||
| 534 | |||
| 535 | if (! empty($files)) { |
||
| 536 | return [ |
||
| 537 | 'success' => true, |
||
| 538 | 'files' => $this->filterExtractedFiles($files), |
||
| 539 | 'hasPassword' => false, |
||
| 540 | 'passwordStatus' => ReleaseBrowseService::PASSWD_NONE, |
||
| 541 | 'archiveMarker' => $marker, |
||
| 542 | ]; |
||
| 543 | } |
||
| 544 | } catch (\Throwable $e) { |
||
| 545 | if ($this->config->debugMode) { |
||
| 546 | Log::warning(strtoupper($type).' extraction exception: '.$e->getMessage()); |
||
| 547 | } |
||
| 548 | } |
||
| 549 | |||
| 550 | return $result; |
||
| 551 | } |
||
| 552 | |||
| 553 | /** |
||
| 554 | * Filter extracted files by allowed extensions. |
||
| 555 | */ |
||
| 556 | private function filterExtractedFiles(array $files): array |
||
| 557 | { |
||
| 558 | $allowedExtensions = $this->getAllowedExtensions(); |
||
| 559 | $filtered = []; |
||
| 560 | |||
| 561 | foreach ($files as $file) { |
||
| 562 | $name = $file['name'] ?? ''; |
||
| 563 | $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); |
||
| 564 | |||
| 565 | if (! in_array($ext, $allowedExtensions, true)) { |
||
| 566 | continue; |
||
| 567 | } |
||
| 568 | |||
| 569 | $base = pathinfo($name, PATHINFO_FILENAME); |
||
| 570 | $letterCount = preg_match_all('/[a-z]/i', $base); |
||
| 571 | if ($letterCount <= 5) { |
||
| 572 | continue; |
||
| 573 | } |
||
| 574 | |||
| 575 | $filtered[] = $file; |
||
| 576 | } |
||
| 577 | |||
| 578 | return $filtered; |
||
| 579 | } |
||
| 580 | |||
| 581 | /** |
||
| 582 | * Get list of allowed file extensions. |
||
| 583 | */ |
||
| 584 | private function getAllowedExtensions(): array |
||
| 585 | { |
||
| 586 | return [ |
||
| 587 | // NFO and info files (prioritized for extraction) |
||
| 588 | 'nfo', 'diz', 'inf', 'txt', |
||
| 589 | // Subtitles |
||
| 590 | 'srt', 'sub', 'idx', 'ass', 'ssa', 'vtt', |
||
| 591 | // Video |
||
| 592 | 'mkv', 'mpeg', 'avi', 'mp4', 'm4v', 'mov', 'wmv', 'flv', 'ts', 'vob', 'm2ts', 'webm', |
||
| 593 | // Audio |
||
| 594 | 'mp3', 'm4a', 'flac', 'ogg', 'aac', 'wav', 'wma', 'opus', 'ape', |
||
| 595 | // Images |
||
| 596 | 'jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', |
||
| 597 | // Documents |
||
| 598 | 'epub', 'pdf', 'cbz', 'cbr', 'djvu', 'mobi', 'azw', 'azw3', |
||
| 599 | // Executables (for software releases) |
||
| 600 | 'exe', 'msi', |
||
| 601 | ]; |
||
| 602 | } |
||
| 603 | |||
| 604 | /** |
||
| 605 | * Check if a file is an NFO or info file. |
||
| 606 | * |
||
| 607 | * @param string $filename The filename to check. |
||
| 608 | * @return bool True if it's an NFO-like file. |
||
| 609 | */ |
||
| 610 | public function isNfoFile(string $filename): bool |
||
| 611 | { |
||
| 612 | $basename = strtolower(basename($filename)); |
||
| 613 | |||
| 614 | // Standard NFO extensions |
||
| 615 | if (preg_match('/\.(nfo|diz|inf)$/i', $basename)) { |
||
| 616 | return true; |
||
| 617 | } |
||
| 618 | |||
| 619 | // Common NFO alternative names |
||
| 620 | $nfoNames = [ |
||
| 621 | 'file_id.diz', 'fileid.diz', 'file-id.diz', |
||
| 622 | 'readme.txt', 'readme.1st', 'read.me', 'readmenow.txt', |
||
| 623 | 'info.txt', 'information.txt', 'about.txt', 'notes.txt', |
||
| 624 | 'release.txt', 'release.nfo', |
||
| 625 | ]; |
||
| 626 | |||
| 627 | if (in_array($basename, $nfoNames, true)) { |
||
| 628 | return true; |
||
| 629 | } |
||
| 630 | |||
| 631 | // Scene-style NFO naming: 00-groupname.nfo, group-release.nfo |
||
| 632 | if (preg_match('/^(?:00?-[a-z0-9_-]+|[a-z0-9]+-[a-z0-9._-]+)\.(?:nfo|txt)$/i', $basename)) { |
||
| 633 | return true; |
||
| 634 | } |
||
| 635 | |||
| 636 | return false; |
||
| 637 | } |
||
| 638 | |||
| 639 | /** |
||
| 640 | * Sort files to prioritize NFO files for processing. |
||
| 641 | * |
||
| 642 | * @param array $files Array of file info arrays. |
||
| 643 | * @return array Sorted array with NFO files first. |
||
| 644 | */ |
||
| 645 | public function sortFilesWithNfoPriority(array $files): array |
||
| 646 | { |
||
| 647 | usort($files, function ($a, $b) { |
||
| 648 | $aIsNfo = $this->isNfoFile($a['name'] ?? ''); |
||
| 649 | $bIsNfo = $this->isNfoFile($b['name'] ?? ''); |
||
| 650 | |||
| 651 | if ($aIsNfo && ! $bIsNfo) { |
||
| 652 | return -1; |
||
| 653 | } |
||
| 654 | if (! $aIsNfo && $bIsNfo) { |
||
| 655 | return 1; |
||
| 656 | } |
||
| 657 | |||
| 658 | return 0; |
||
| 659 | }); |
||
| 660 | |||
| 661 | return $files; |
||
| 662 | } |
||
| 663 | |||
| 664 | /** |
||
| 665 | * Prepare extraction directories. |
||
| 666 | */ |
||
| 667 | private function prepareExtractionDirectories(string $tmpPath): void |
||
| 668 | { |
||
| 669 | if ($this->config->extractUsingRarInfo) { |
||
| 670 | return; |
||
| 671 | } |
||
| 672 | |||
| 673 | try { |
||
| 674 | if ($this->config->unrarPath) { |
||
| 675 | $unrarDir = $tmpPath.'unrar/'; |
||
| 676 | if (! File::isDirectory($unrarDir)) { |
||
| 677 | File::makeDirectory($unrarDir, 0777, true, true); |
||
| 678 | } |
||
| 679 | } |
||
| 680 | $unzipDir = $tmpPath.'unzip/'; |
||
| 681 | if (! File::isDirectory($unzipDir)) { |
||
| 682 | File::makeDirectory($unzipDir, 0777, true, true); |
||
| 683 | } |
||
| 684 | } catch (\Throwable $e) { |
||
| 685 | if ($this->config->debugMode) { |
||
| 686 | Log::warning('Failed ensuring extraction subdirectories: '.$e->getMessage()); |
||
| 687 | } |
||
| 688 | } |
||
| 689 | } |
||
| 690 | |||
| 691 | /** |
||
| 692 | * Extract archive based on type. |
||
| 693 | */ |
||
| 694 | private function extractArchive(string $compressedData, array $dataSummary, string $tmpPath): string |
||
| 721 | } |
||
| 722 | |||
| 723 | /** |
||
| 724 | * Detect standalone video from binary data. |
||
| 725 | */ |
||
| 726 | public function detectStandaloneVideo(string $data): ?string |
||
| 768 | } |
||
| 769 | |||
| 770 | /** |
||
| 771 | * Get PAR2 info parser. |
||
| 772 | */ |
||
| 773 | public function getPar2Info(): Par2Info |
||
| 776 | } |
||
| 777 | |||
| 778 | /** |
||
| 779 | * Get archive info handler. |
||
| 780 | */ |
||
| 781 | public function getArchiveInfo(): ArchiveInfo |
||
| 784 | } |
||
| 785 | |||
| 786 | /** |
||
| 787 | * Extract a specific file from archive data by filename. |
||
| 788 | * |
||
| 789 | * @param string $compressedData The raw archive data |
||
| 790 | * @param string $filename The filename to extract (exact match) |
||
| 791 | * @param string $tmpPath Temporary directory path |
||
| 792 | * @return string|null The extracted file content, or null if extraction failed |
||
| 793 | */ |
||
| 794 | public function extractSpecificFile(string $compressedData, string $filename, string $tmpPath): ?string |
||
| 795 | { |
||
| 796 | // Try using ArchiveInfo's built-in extraction |
||
| 797 | if ($this->archiveInfo->setData($compressedData, true)) { |
||
| 798 | try { |
||
| 799 | $extracted = $this->archiveInfo->getFileData($filename); |
||
| 800 | if ($extracted !== false && ! empty($extracted)) { |
||
| 801 | return $extracted; |
||
| 802 | } |
||
| 803 | } catch (\Throwable $e) { |
||
| 804 | if ($this->config->debugMode) { |
||
| 805 | Log::debug('ArchiveInfo getFileData failed: '.$e->getMessage()); |
||
| 806 | } |
||
| 807 | } |
||
| 808 | } |
||
| 809 | |||
| 810 | // Fallback: use external tools to extract to temp directory |
||
| 811 | $archiveType = $this->detectArchiveType($compressedData); |
||
| 812 | |||
| 813 | if ($archiveType === '7z' && $this->config->sevenZipPath) { |
||
| 814 | return $this->extractFileVia7zip($compressedData, $filename, $tmpPath); |
||
| 815 | } |
||
| 816 | |||
| 817 | // Try using unrar for RAR files |
||
| 818 | if ($this->config->unrarPath) { |
||
| 819 | $extracted = $this->extractFileViaUnrar($compressedData, $filename, $tmpPath); |
||
| 820 | if ($extracted !== null) { |
||
| 821 | return $extracted; |
||
| 822 | } |
||
| 823 | } |
||
| 824 | |||
| 825 | // Try using unzip for ZIP files |
||
| 826 | if ($this->config->unzipPath) { |
||
| 827 | $extracted = $this->extractFileViaUnzip($compressedData, $filename, $tmpPath); |
||
| 828 | if ($extracted !== null) { |
||
| 829 | return $extracted; |
||
| 830 | } |
||
| 831 | } |
||
| 832 | |||
| 833 | return null; |
||
| 834 | } |
||
| 835 | |||
| 836 | /** |
||
| 837 | * Extract a specific file using 7zip. |
||
| 838 | */ |
||
| 839 | private function extractFileVia7zip(string $compressedData, string $filename, string $tmpPath): ?string |
||
| 840 | { |
||
| 841 | try { |
||
| 842 | $extractDir = $tmpPath.'extract_'.uniqid('', true).'/'; |
||
| 843 | if (! File::isDirectory($extractDir)) { |
||
| 844 | File::makeDirectory($extractDir, 0777, true, true); |
||
| 845 | } |
||
| 846 | |||
| 847 | $archiveFile = $tmpPath.'archive_'.uniqid('', true).'.7z'; |
||
| 848 | File::put($archiveFile, $compressedData); |
||
| 849 | |||
| 850 | // Extract specific file |
||
| 851 | $cmd = [$this->config->sevenZipPath, 'e', '-y', '-bd', '-o'.$extractDir, $archiveFile, $filename]; |
||
| 852 | $exitCode = 0; |
||
| 853 | $stdout = null; |
||
| 854 | $stderr = null; |
||
| 855 | $this->execCommand($cmd, $exitCode, $stdout, $stderr); |
||
| 856 | |||
| 857 | File::delete($archiveFile); |
||
| 858 | |||
| 859 | // Look for extracted file |
||
| 860 | $extractedPath = $extractDir.basename($filename); |
||
| 861 | if (File::isFile($extractedPath)) { |
||
| 862 | $content = File::get($extractedPath); |
||
| 863 | File::deleteDirectory($extractDir); |
||
| 864 | |||
| 865 | return $content; |
||
| 866 | } |
||
| 867 | |||
| 868 | // Try to find it with glob (in case path differs) |
||
| 869 | $files = File::allFiles($extractDir); |
||
| 870 | foreach ($files as $file) { |
||
| 871 | if (strtolower($file->getFilename()) === strtolower(basename($filename))) { |
||
| 872 | $content = File::get($file->getPathname()); |
||
| 873 | File::deleteDirectory($extractDir); |
||
| 874 | |||
| 875 | return $content; |
||
| 876 | } |
||
| 877 | } |
||
| 878 | |||
| 879 | File::deleteDirectory($extractDir); |
||
| 880 | } catch (\Throwable $e) { |
||
| 881 | if ($this->config->debugMode) { |
||
| 882 | Log::debug('7zip extraction failed: '.$e->getMessage()); |
||
| 883 | } |
||
| 884 | } |
||
| 885 | |||
| 886 | return null; |
||
| 887 | } |
||
| 888 | |||
| 889 | /** |
||
| 890 | * Extract a specific file using unrar. |
||
| 891 | */ |
||
| 892 | private function extractFileViaUnrar(string $compressedData, string $filename, string $tmpPath): ?string |
||
| 937 | } |
||
| 938 | |||
| 939 | /** |
||
| 940 | * Extract a specific file using unzip. |
||
| 941 | */ |
||
| 942 | private function extractFileViaUnzip(string $compressedData, string $filename, string $tmpPath): ?string |
||
| 986 | } |
||
| 987 | |||
| 988 | /** |
||
| 989 | * Execute a command with output capture. |
||
| 990 | */ |
||
| 991 | private function execCommand(array $cmd, ?int &$exitCode, ?string &$stdout, ?string &$stderr): bool |
||
| 1014 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths