| Total Complexity | 304 |
| Total Lines | 1706 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like SevenZipPartialParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SevenZipPartialParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | class SevenZipPartialParser |
||
| 18 | { |
||
| 19 | private string $data; |
||
| 20 | |||
| 21 | private int $len; |
||
| 22 | |||
| 23 | private array $names = []; |
||
| 24 | |||
| 25 | private array $files = []; // Extended file info with metadata |
||
| 26 | |||
| 27 | private array $sizes = []; // Uncompressed sizes |
||
| 28 | |||
| 29 | private array $packedSizes = []; // Compressed sizes |
||
| 30 | |||
| 31 | private array $crcs = []; // CRC32 values |
||
| 32 | |||
| 33 | private array $attributes = []; // File attributes (directory, readonly, etc.) |
||
| 34 | |||
| 35 | private array $mtimes = []; // Modification times |
||
| 36 | |||
| 37 | private array $ctimes = []; // Creation times |
||
| 38 | |||
| 39 | private array $atimes = []; // Access times |
||
| 40 | |||
| 41 | private bool $parsed = false; |
||
| 42 | |||
| 43 | private bool $encodedHeader = false; // flag if we encountered kEncodedHeader |
||
| 44 | |||
| 45 | private bool $encrypted = false; // heuristic flag if AES encryption detected |
||
| 46 | |||
| 47 | private bool $headerEncrypted = false; // flag if header itself is encrypted |
||
| 48 | |||
| 49 | private bool $solidArchive = false; // flag for solid archives |
||
| 50 | |||
| 51 | private int $numFiles = 0; // Total number of files detected |
||
| 52 | |||
| 53 | private array $compressionMethods = []; // Detected compression methods |
||
| 54 | |||
| 55 | private int $totalUnpackedSize = 0; // Total unpacked size |
||
| 56 | |||
| 57 | private int $totalPackedSize = 0; // Total packed size |
||
| 58 | |||
| 59 | private string $lastError = ''; // Last error message for debugging |
||
| 60 | |||
| 61 | // 7z Property IDs |
||
| 62 | private const K_END = 0x00; |
||
| 63 | |||
| 64 | private const K_HEADER = 0x01; |
||
| 65 | |||
| 66 | private const K_ARCHIVE_PROPERTIES = 0x02; |
||
| 67 | |||
| 68 | private const K_ADDITIONAL_STREAMS_INFO = 0x03; |
||
| 69 | |||
| 70 | private const K_MAIN_STREAMS_INFO = 0x04; |
||
| 71 | |||
| 72 | private const K_FILES_INFO = 0x05; |
||
| 73 | |||
| 74 | private const K_PACK_INFO = 0x06; |
||
| 75 | |||
| 76 | private const K_UNPACK_INFO = 0x07; |
||
| 77 | |||
| 78 | private const K_SUBSTREAMS_INFO = 0x08; |
||
| 79 | |||
| 80 | private const K_SIZE = 0x09; |
||
| 81 | |||
| 82 | private const K_CRC = 0x0A; |
||
| 83 | |||
| 84 | private const K_FOLDER = 0x0B; |
||
| 85 | |||
| 86 | private const K_CODERS_UNPACK_SIZE = 0x0C; |
||
| 87 | |||
| 88 | private const K_NUM_UNPACK_STREAM = 0x0D; |
||
| 89 | |||
| 90 | private const K_EMPTY_STREAM = 0x0E; |
||
| 91 | |||
| 92 | private const K_EMPTY_FILE = 0x0F; |
||
| 93 | |||
| 94 | private const K_ANTI = 0x10; |
||
| 95 | |||
| 96 | private const K_NAME = 0x11; |
||
| 97 | |||
| 98 | private const K_CTIME = 0x12; |
||
| 99 | |||
| 100 | private const K_ATIME = 0x13; |
||
| 101 | |||
| 102 | private const K_MTIME = 0x14; |
||
| 103 | |||
| 104 | private const K_WIN_ATTRIBUTES = 0x15; |
||
| 105 | |||
| 106 | private const K_COMMENT = 0x16; |
||
| 107 | |||
| 108 | private const K_ENCODED_HEADER = 0x17; |
||
| 109 | |||
| 110 | private const K_START_POS = 0x18; |
||
| 111 | |||
| 112 | private const K_DUMMY = 0x19; |
||
| 113 | |||
| 114 | // Compression method IDs |
||
| 115 | private const METHOD_COPY = "\x00"; |
||
| 116 | |||
| 117 | private const METHOD_LZMA = "\x03\x01\x01"; |
||
| 118 | |||
| 119 | private const METHOD_LZMA2 = "\x21"; |
||
| 120 | |||
| 121 | private const METHOD_PPMD = "\x03\x04\x01"; |
||
| 122 | |||
| 123 | private const METHOD_BCJ = "\x03\x03\x01\x03"; |
||
| 124 | |||
| 125 | private const METHOD_BCJ2 = "\x03\x03\x01\x1B"; |
||
| 126 | |||
| 127 | private const METHOD_DEFLATE = "\x04\x01\x08"; |
||
| 128 | |||
| 129 | private const METHOD_BZIP2 = "\x04\x02\x02"; |
||
| 130 | |||
| 131 | private const METHOD_AES = "\x06\xF1\x07\x01"; |
||
| 132 | |||
| 133 | // Windows file attributes |
||
| 134 | private const FILE_ATTRIBUTE_READONLY = 0x01; |
||
| 135 | |||
| 136 | private const FILE_ATTRIBUTE_HIDDEN = 0x02; |
||
| 137 | |||
| 138 | private const FILE_ATTRIBUTE_SYSTEM = 0x04; |
||
| 139 | |||
| 140 | private const FILE_ATTRIBUTE_DIRECTORY = 0x10; |
||
| 141 | |||
| 142 | private const FILE_ATTRIBUTE_ARCHIVE = 0x20; |
||
| 143 | |||
| 144 | public function __construct(string $data) |
||
| 145 | { |
||
| 146 | $this->data = $data; |
||
| 147 | $this->len = strlen($data); |
||
| 148 | } |
||
| 149 | |||
| 150 | /** |
||
| 151 | * Public accessor: returns recovered filenames (UTF-8) or empty array. |
||
| 152 | */ |
||
| 153 | public function getFileNames(): array |
||
| 154 | { |
||
| 155 | if (! $this->parsed) { |
||
| 156 | $this->parse(); |
||
| 157 | } |
||
| 158 | |||
| 159 | return $this->names; |
||
| 160 | } |
||
| 161 | |||
| 162 | /** |
||
| 163 | * Returns detailed file information with all available metadata. |
||
| 164 | * |
||
| 165 | * @return array<int, array{name: string, size: int|null, packed_size: int|null, crc: string|null, attributes: int|null, is_dir: bool, mtime: int|null, ctime: int|null, atime: int|null}> |
||
| 166 | */ |
||
| 167 | public function getFiles(): array |
||
| 168 | { |
||
| 169 | if (! $this->parsed) { |
||
| 170 | $this->parse(); |
||
| 171 | } |
||
| 172 | |||
| 173 | return $this->files; |
||
| 174 | } |
||
| 175 | |||
| 176 | /** |
||
| 177 | * Returns uncompressed file sizes indexed by file index. |
||
| 178 | */ |
||
| 179 | public function getSizes(): array |
||
| 180 | { |
||
| 181 | if (! $this->parsed) { |
||
| 182 | $this->parse(); |
||
| 183 | } |
||
| 184 | |||
| 185 | return $this->sizes; |
||
| 186 | } |
||
| 187 | |||
| 188 | /** |
||
| 189 | * Returns CRC32 values as hex strings indexed by file index. |
||
| 190 | */ |
||
| 191 | public function getCRCs(): array |
||
| 192 | { |
||
| 193 | if (! $this->parsed) { |
||
| 194 | $this->parse(); |
||
| 195 | } |
||
| 196 | |||
| 197 | return $this->crcs; |
||
| 198 | } |
||
| 199 | |||
| 200 | /** |
||
| 201 | * Returns file attributes indexed by file index. |
||
| 202 | */ |
||
| 203 | public function getAttributes(): array |
||
| 204 | { |
||
| 205 | if (! $this->parsed) { |
||
| 206 | $this->parse(); |
||
| 207 | } |
||
| 208 | |||
| 209 | return $this->attributes; |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Returns modification times (Unix timestamps) indexed by file index. |
||
| 214 | */ |
||
| 215 | public function getModificationTimes(): array |
||
| 216 | { |
||
| 217 | if (! $this->parsed) { |
||
| 218 | $this->parse(); |
||
| 219 | } |
||
| 220 | |||
| 221 | return $this->mtimes; |
||
| 222 | } |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Returns detected compression methods used in the archive. |
||
| 226 | */ |
||
| 227 | public function getCompressionMethods(): array |
||
| 228 | { |
||
| 229 | if (! $this->parsed) { |
||
| 230 | $this->parse(); |
||
| 231 | } |
||
| 232 | |||
| 233 | return array_unique($this->compressionMethods); |
||
| 234 | } |
||
| 235 | |||
| 236 | /** |
||
| 237 | * Returns total number of files detected. |
||
| 238 | */ |
||
| 239 | public function getFileCount(): int |
||
| 240 | { |
||
| 241 | if (! $this->parsed) { |
||
| 242 | $this->parse(); |
||
| 243 | } |
||
| 244 | |||
| 245 | return $this->numFiles; |
||
| 246 | } |
||
| 247 | |||
| 248 | /** |
||
| 249 | * Returns total unpacked (uncompressed) size of all files. |
||
| 250 | */ |
||
| 251 | public function getTotalUnpackedSize(): int |
||
| 252 | { |
||
| 253 | if (! $this->parsed) { |
||
| 254 | $this->parse(); |
||
| 255 | } |
||
| 256 | |||
| 257 | return $this->totalUnpackedSize; |
||
| 258 | } |
||
| 259 | |||
| 260 | /** |
||
| 261 | * Returns total packed (compressed) size. |
||
| 262 | */ |
||
| 263 | public function getTotalPackedSize(): int |
||
| 264 | { |
||
| 265 | if (! $this->parsed) { |
||
| 266 | $this->parse(); |
||
| 267 | } |
||
| 268 | |||
| 269 | return $this->totalPackedSize; |
||
| 270 | } |
||
| 271 | |||
| 272 | /** |
||
| 273 | * Public accessor: returns true if AES encryption is detected (heuristic). |
||
| 274 | */ |
||
| 275 | public function isEncrypted(): bool |
||
| 276 | { |
||
| 277 | if (! $this->parsed) { |
||
| 278 | $this->parse(); |
||
| 279 | } |
||
| 280 | |||
| 281 | return $this->encrypted; |
||
| 282 | } |
||
| 283 | |||
| 284 | /** |
||
| 285 | * Returns true if the header itself is encrypted. |
||
| 286 | */ |
||
| 287 | public function isHeaderEncrypted(): bool |
||
| 288 | { |
||
| 289 | if (! $this->parsed) { |
||
| 290 | $this->parse(); |
||
| 291 | } |
||
| 292 | |||
| 293 | return $this->headerEncrypted; |
||
| 294 | } |
||
| 295 | |||
| 296 | /** |
||
| 297 | * Returns true if this is a solid archive. |
||
| 298 | */ |
||
| 299 | public function isSolidArchive(): bool |
||
| 300 | { |
||
| 301 | if (! $this->parsed) { |
||
| 302 | $this->parse(); |
||
| 303 | } |
||
| 304 | |||
| 305 | return $this->solidArchive; |
||
| 306 | } |
||
| 307 | |||
| 308 | public function hasEncodedHeader(): bool |
||
| 309 | { |
||
| 310 | if (! $this->parsed) { |
||
| 311 | $this->parse(); |
||
| 312 | } |
||
| 313 | |||
| 314 | return $this->encodedHeader; |
||
| 315 | } |
||
| 316 | |||
| 317 | /** |
||
| 318 | * Returns the last error message for debugging. |
||
| 319 | */ |
||
| 320 | public function getLastError(): string |
||
| 321 | { |
||
| 322 | return $this->lastError; |
||
| 323 | } |
||
| 324 | |||
| 325 | /** |
||
| 326 | * Check if the data appears to be a valid 7z archive (has correct signature). |
||
| 327 | */ |
||
| 328 | public function isValid7zSignature(): bool |
||
| 329 | { |
||
| 330 | return $this->len >= 6 && strncmp($this->data, "\x37\x7A\xBC\xAF\x27\x1C", 6) === 0; |
||
| 331 | } |
||
| 332 | |||
| 333 | /** |
||
| 334 | * Returns compression ratio as a float (0.0 to 1.0+), or null if unknown. |
||
| 335 | */ |
||
| 336 | public function getCompressionRatio(): ?float |
||
| 337 | { |
||
| 338 | if (! $this->parsed) { |
||
| 339 | $this->parse(); |
||
| 340 | } |
||
| 341 | |||
| 342 | if ($this->totalUnpackedSize > 0 && $this->totalPackedSize > 0) { |
||
| 343 | return $this->totalPackedSize / $this->totalUnpackedSize; |
||
| 344 | } |
||
| 345 | |||
| 346 | return null; |
||
| 347 | } |
||
| 348 | |||
| 349 | private function parse(): void |
||
| 481 | } |
||
| 482 | } |
||
| 483 | |||
| 484 | /** |
||
| 485 | * Parse encoded header to extract compression method info and detect encryption. |
||
| 486 | */ |
||
| 487 | private function parseEncodedHeader(int $cursor, int $limit): void |
||
| 488 | { |
||
| 489 | $cursor++; // skip K_ENCODED_HEADER byte |
||
| 490 | |||
| 491 | // Scan for AES encryption signature |
||
| 492 | $scan = substr($this->data, $cursor, min(1024, $limit - $cursor)); |
||
| 493 | if (strpos($scan, self::METHOD_AES) !== false) { |
||
| 494 | $this->encrypted = true; |
||
| 495 | $this->headerEncrypted = true; |
||
| 496 | } |
||
| 497 | |||
| 498 | // Look for streams info |
||
| 499 | while ($cursor < $limit) { |
||
| 500 | $id = ord($this->data[$cursor]); |
||
| 501 | $cursor++; |
||
| 502 | |||
| 503 | if ($id === self::K_END) { |
||
| 504 | break; |
||
| 505 | } |
||
| 506 | |||
| 507 | if ($id === self::K_PACK_INFO) { |
||
| 508 | $cursor = $this->parsePackInfo($cursor, $limit); |
||
| 509 | } elseif ($id === self::K_UNPACK_INFO) { |
||
| 510 | $cursor = $this->parseUnpackInfo($cursor, $limit); |
||
| 511 | } elseif ($id === self::K_SUBSTREAMS_INFO) { |
||
| 512 | $cursor = $this->skipUntilEnd($cursor, $limit); |
||
| 513 | } else { |
||
| 514 | // Skip unknown |
||
| 515 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 516 | break; |
||
| 517 | } |
||
| 518 | $cursor += $propSize; |
||
| 519 | } |
||
| 520 | |||
| 521 | if ($cursor === -1) { |
||
| 522 | break; |
||
| 523 | } |
||
| 524 | } |
||
| 525 | } |
||
| 526 | |||
| 527 | /** |
||
| 528 | * Parse a partial/incomplete header to extract whatever information is available. |
||
| 529 | */ |
||
| 530 | private function parsePartialHeader(int $start, int $end): void |
||
| 531 | { |
||
| 532 | $cursor = $start; |
||
| 533 | |||
| 534 | // Try to identify what kind of block this might be |
||
| 535 | if ($cursor < $end) { |
||
| 536 | $id = ord($this->data[$cursor]); |
||
| 537 | |||
| 538 | if ($id === self::K_ENCODED_HEADER) { |
||
| 539 | $this->encodedHeader = true; |
||
| 540 | $scan = substr($this->data, $cursor, min(512, $end - $cursor)); |
||
| 541 | if (strpos($scan, self::METHOD_AES) !== false) { |
||
| 542 | $this->encrypted = true; |
||
| 543 | $this->headerEncrypted = true; |
||
| 544 | } |
||
| 545 | } elseif ($id === self::K_HEADER) { |
||
| 546 | $cursor++; |
||
| 547 | // Try to parse as much as possible |
||
| 548 | while ($cursor < $end - 1) { |
||
| 549 | $blockId = ord($this->data[$cursor]); |
||
| 550 | if ($blockId === self::K_FILES_INFO) { |
||
| 551 | $cursor++; |
||
| 552 | $this->parseFilesInfo($cursor, $end); |
||
| 553 | break; |
||
| 554 | } |
||
| 555 | $cursor++; |
||
| 556 | } |
||
| 557 | } |
||
| 558 | } |
||
| 559 | } |
||
| 560 | |||
| 561 | /** |
||
| 562 | * Parse MainStreamsInfo to get packed/unpacked sizes and detect solid archives. |
||
| 563 | */ |
||
| 564 | private function parseMainStreamsInfo(int $cursor, int $limit): int |
||
| 565 | { |
||
| 566 | while ($cursor < $limit) { |
||
| 567 | $id = ord($this->data[$cursor]); |
||
| 568 | $cursor++; |
||
| 569 | |||
| 570 | if ($id === self::K_END) { |
||
| 571 | return $cursor; |
||
| 572 | } |
||
| 573 | |||
| 574 | switch ($id) { |
||
| 575 | case self::K_PACK_INFO: |
||
| 576 | $cursor = $this->parsePackInfo($cursor, $limit); |
||
| 577 | break; |
||
| 578 | |||
| 579 | case self::K_UNPACK_INFO: |
||
| 580 | $cursor = $this->parseUnpackInfo($cursor, $limit); |
||
| 581 | break; |
||
| 582 | |||
| 583 | case self::K_SUBSTREAMS_INFO: |
||
| 584 | $cursor = $this->parseSubstreamsInfo($cursor, $limit); |
||
| 585 | break; |
||
| 586 | |||
| 587 | default: |
||
| 588 | return $this->skipUntilEnd($cursor - 1, $limit); |
||
| 589 | } |
||
| 590 | |||
| 591 | if ($cursor === -1) { |
||
| 592 | return -1; |
||
| 593 | } |
||
| 594 | } |
||
| 595 | |||
| 596 | return $cursor; |
||
| 597 | } |
||
| 598 | |||
| 599 | /** |
||
| 600 | * Parse PackInfo to get packed sizes. |
||
| 601 | */ |
||
| 602 | private function parsePackInfo(int $cursor, int $limit): int |
||
| 603 | { |
||
| 604 | // PackPos (VInt) |
||
| 605 | if (! $this->readVIntAt($cursor, $packPos, $cursor, $limit)) { |
||
| 606 | return -1; |
||
| 607 | } |
||
| 608 | |||
| 609 | // NumPackStreams (VInt) |
||
| 610 | if (! $this->readVIntAt($cursor, $numPackStreams, $cursor, $limit)) { |
||
| 611 | return -1; |
||
| 612 | } |
||
| 613 | |||
| 614 | while ($cursor < $limit) { |
||
| 615 | $id = ord($this->data[$cursor]); |
||
| 616 | $cursor++; |
||
| 617 | |||
| 618 | if ($id === self::K_END) { |
||
| 619 | return $cursor; |
||
| 620 | } |
||
| 621 | |||
| 622 | if ($id === self::K_SIZE) { |
||
| 623 | // Read packed sizes |
||
| 624 | for ($i = 0; $i < $numPackStreams && $cursor < $limit; $i++) { |
||
| 625 | if ($this->readVIntAt($cursor, $size, $cursor, $limit)) { |
||
| 626 | $this->packedSizes[] = $size; |
||
| 627 | $this->totalPackedSize += $size; |
||
| 628 | } |
||
| 629 | } |
||
| 630 | } elseif ($id === self::K_CRC) { |
||
| 631 | // Skip CRC info |
||
| 632 | $cursor = $this->skipBitVector($cursor, $limit, $numPackStreams); |
||
| 633 | } else { |
||
| 634 | return -1; |
||
| 635 | } |
||
| 636 | } |
||
| 637 | |||
| 638 | return $cursor; |
||
| 639 | } |
||
| 640 | |||
| 641 | /** |
||
| 642 | * Parse UnpackInfo to get unpack sizes and detect compression methods. |
||
| 643 | */ |
||
| 644 | private function parseUnpackInfo(int $cursor, int $limit): int |
||
| 645 | { |
||
| 646 | while ($cursor < $limit) { |
||
| 647 | $id = ord($this->data[$cursor]); |
||
| 648 | $cursor++; |
||
| 649 | |||
| 650 | if ($id === self::K_END) { |
||
| 651 | return $cursor; |
||
| 652 | } |
||
| 653 | |||
| 654 | if ($id === self::K_FOLDER) { |
||
| 655 | $cursor = $this->parseFolderInfo($cursor, $limit); |
||
| 656 | } elseif ($id === self::K_CODERS_UNPACK_SIZE) { |
||
| 657 | $cursor = $this->parseCodersUnpackSize($cursor, $limit); |
||
| 658 | } elseif ($id === self::K_CRC) { |
||
| 659 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 660 | return -1; |
||
| 661 | } |
||
| 662 | $cursor += $propSize; |
||
| 663 | } else { |
||
| 664 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 665 | return -1; |
||
| 666 | } |
||
| 667 | $cursor += $propSize; |
||
| 668 | } |
||
| 669 | |||
| 670 | if ($cursor === -1) { |
||
| 671 | return -1; |
||
| 672 | } |
||
| 673 | } |
||
| 674 | |||
| 675 | return $cursor; |
||
| 676 | } |
||
| 677 | |||
| 678 | /** |
||
| 679 | * Parse FolderInfo to detect compression methods and solid archive structure. |
||
| 680 | */ |
||
| 681 | private function parseFolderInfo(int $cursor, int $limit): int |
||
| 682 | { |
||
| 683 | // NumFolders (VInt) |
||
| 684 | if (! $this->readVIntAt($cursor, $numFolders, $cursor, $limit)) { |
||
| 685 | return -1; |
||
| 686 | } |
||
| 687 | |||
| 688 | // If there's only one folder with multiple files, it's likely solid |
||
| 689 | if ($numFolders === 1 && $this->numFiles > 1) { |
||
| 690 | $this->solidArchive = true; |
||
| 691 | } |
||
| 692 | |||
| 693 | // External flag |
||
| 694 | if ($cursor >= $limit) { |
||
| 695 | return -1; |
||
| 696 | } |
||
| 697 | $external = ord($this->data[$cursor]); |
||
| 698 | $cursor++; |
||
| 699 | |||
| 700 | if ($external !== 0) { |
||
| 701 | // External data - skip |
||
| 702 | if (! $this->readVIntAt($cursor, $dataIndex, $cursor, $limit)) { |
||
| 703 | return -1; |
||
| 704 | } |
||
| 705 | |||
| 706 | return $cursor; |
||
| 707 | } |
||
| 708 | |||
| 709 | // Parse each folder |
||
| 710 | for ($i = 0; $i < $numFolders && $cursor < $limit; $i++) { |
||
| 711 | $cursor = $this->parseFolder($cursor, $limit); |
||
| 712 | if ($cursor === -1) { |
||
| 713 | return -1; |
||
| 714 | } |
||
| 715 | } |
||
| 716 | |||
| 717 | return $cursor; |
||
| 718 | } |
||
| 719 | |||
| 720 | /** |
||
| 721 | * Parse a single Folder structure to extract compression method info. |
||
| 722 | */ |
||
| 723 | private function parseFolder(int $cursor, int $limit): int |
||
| 724 | { |
||
| 725 | // NumCoders (VInt) |
||
| 726 | if (! $this->readVIntAt($cursor, $numCoders, $cursor, $limit)) { |
||
| 727 | return -1; |
||
| 728 | } |
||
| 729 | |||
| 730 | $totalInputStreams = 0; |
||
| 731 | $totalOutputStreams = 0; |
||
| 732 | |||
| 733 | for ($i = 0; $i < $numCoders && $cursor < $limit; $i++) { |
||
| 734 | // Coder flags |
||
| 735 | $flags = ord($this->data[$cursor]); |
||
| 736 | $cursor++; |
||
| 737 | |||
| 738 | $codecIdSize = $flags & 0x0F; |
||
| 739 | $isComplex = ($flags & 0x10) !== 0; |
||
| 740 | $hasAttributes = ($flags & 0x20) !== 0; |
||
| 741 | |||
| 742 | // Codec ID |
||
| 743 | if ($cursor + $codecIdSize > $limit) { |
||
| 744 | return -1; |
||
| 745 | } |
||
| 746 | $codecId = substr($this->data, $cursor, $codecIdSize); |
||
| 747 | $cursor += $codecIdSize; |
||
| 748 | |||
| 749 | // Identify compression method |
||
| 750 | $method = $this->identifyCompressionMethod($codecId); |
||
| 751 | if ($method !== null) { |
||
| 752 | $this->compressionMethods[] = $method; |
||
| 753 | |||
| 754 | // Check for AES encryption |
||
| 755 | if ($method === 'AES-256') { |
||
| 756 | $this->encrypted = true; |
||
| 757 | } |
||
| 758 | } |
||
| 759 | |||
| 760 | if ($isComplex) { |
||
| 761 | // NumInStreams (VInt) |
||
| 762 | if (! $this->readVIntAt($cursor, $numIn, $cursor, $limit)) { |
||
| 763 | return -1; |
||
| 764 | } |
||
| 765 | // NumOutStreams (VInt) |
||
| 766 | if (! $this->readVIntAt($cursor, $numOut, $cursor, $limit)) { |
||
| 767 | return -1; |
||
| 768 | } |
||
| 769 | $totalInputStreams += $numIn; |
||
| 770 | $totalOutputStreams += $numOut; |
||
| 771 | } else { |
||
| 772 | $totalInputStreams++; |
||
| 773 | $totalOutputStreams++; |
||
| 774 | } |
||
| 775 | |||
| 776 | if ($hasAttributes) { |
||
| 777 | // PropertiesSize (VInt) |
||
| 778 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 779 | return -1; |
||
| 780 | } |
||
| 781 | $cursor += $propSize; |
||
| 782 | } |
||
| 783 | } |
||
| 784 | |||
| 785 | // BindPairs |
||
| 786 | $numBindPairs = $totalOutputStreams - 1; |
||
| 787 | for ($i = 0; $i < $numBindPairs && $cursor < $limit; $i++) { |
||
| 788 | if (! $this->readVIntAt($cursor, $inIndex, $cursor, $limit)) { |
||
| 789 | return -1; |
||
| 790 | } |
||
| 791 | if (! $this->readVIntAt($cursor, $outIndex, $cursor, $limit)) { |
||
| 792 | return -1; |
||
| 793 | } |
||
| 794 | } |
||
| 795 | |||
| 796 | // PackedStreams |
||
| 797 | $numPackedStreams = $totalInputStreams - $numBindPairs; |
||
| 798 | if ($numPackedStreams > 1) { |
||
| 799 | for ($i = 0; $i < $numPackedStreams && $cursor < $limit; $i++) { |
||
| 800 | if (! $this->readVIntAt($cursor, $packedIndex, $cursor, $limit)) { |
||
| 801 | return -1; |
||
| 802 | } |
||
| 803 | } |
||
| 804 | } |
||
| 805 | |||
| 806 | return $cursor; |
||
| 807 | } |
||
| 808 | |||
| 809 | /** |
||
| 810 | * Identify compression method from codec ID bytes. |
||
| 811 | */ |
||
| 812 | private function identifyCompressionMethod(string $codecId): ?string |
||
| 813 | { |
||
| 814 | if ($codecId === self::METHOD_COPY || $codecId === "\x00") { |
||
| 815 | return 'Copy'; |
||
| 816 | } |
||
| 817 | if (str_starts_with($codecId, "\x03\x01\x01")) { |
||
| 818 | return 'LZMA'; |
||
| 819 | } |
||
| 820 | if ($codecId === self::METHOD_LZMA2 || str_starts_with($codecId, "\x21")) { |
||
| 821 | return 'LZMA2'; |
||
| 822 | } |
||
| 823 | if (str_starts_with($codecId, "\x03\x04\x01")) { |
||
| 824 | return 'PPMd'; |
||
| 825 | } |
||
| 826 | if (str_starts_with($codecId, "\x03\x03\x01\x03")) { |
||
| 827 | return 'BCJ'; |
||
| 828 | } |
||
| 829 | if (str_starts_with($codecId, "\x03\x03\x01\x1B")) { |
||
| 830 | return 'BCJ2'; |
||
| 831 | } |
||
| 832 | if (str_starts_with($codecId, "\x04\x01\x08")) { |
||
| 833 | return 'Deflate'; |
||
| 834 | } |
||
| 835 | if (str_starts_with($codecId, "\x04\x02\x02")) { |
||
| 836 | return 'BZip2'; |
||
| 837 | } |
||
| 838 | if (str_starts_with($codecId, "\x06\xF1\x07\x01")) { |
||
| 839 | return 'AES-256'; |
||
| 840 | } |
||
| 841 | if (str_starts_with($codecId, "\x03\x03\x01\x05")) { |
||
| 842 | return 'ARM'; |
||
| 843 | } |
||
| 844 | if (str_starts_with($codecId, "\x03\x03\x01\x08")) { |
||
| 845 | return 'SPARC'; |
||
| 846 | } |
||
| 847 | |||
| 848 | return null; |
||
| 849 | } |
||
| 850 | |||
| 851 | /** |
||
| 852 | * Parse CodersUnpackSize. |
||
| 853 | */ |
||
| 854 | private function parseCodersUnpackSize(int $cursor, int $limit): int |
||
| 855 | { |
||
| 856 | // This should read sizes for each output stream of each folder |
||
| 857 | // For now, just read available VInts as sizes |
||
| 858 | while ($cursor < $limit) { |
||
| 859 | $b = ord($this->data[$cursor]); |
||
| 860 | if ($b === self::K_END || $b === self::K_CRC) { |
||
| 861 | break; |
||
| 862 | } |
||
| 863 | if ($this->readVIntAt($cursor, $size, $newCursor, $limit)) { |
||
| 864 | $this->sizes[] = $size; |
||
| 865 | $this->totalUnpackedSize += $size; |
||
| 866 | $cursor = $newCursor; |
||
| 867 | } else { |
||
| 868 | break; |
||
| 869 | } |
||
| 870 | } |
||
| 871 | |||
| 872 | return $cursor; |
||
| 873 | } |
||
| 874 | |||
| 875 | /** |
||
| 876 | * Parse SubstreamsInfo for individual file sizes within folders. |
||
| 877 | */ |
||
| 878 | private function parseSubstreamsInfo(int $cursor, int $limit): int |
||
| 879 | { |
||
| 880 | while ($cursor < $limit) { |
||
| 881 | $id = ord($this->data[$cursor]); |
||
| 882 | $cursor++; |
||
| 883 | |||
| 884 | if ($id === self::K_END) { |
||
| 885 | return $cursor; |
||
| 886 | } |
||
| 887 | |||
| 888 | switch ($id) { |
||
| 889 | case self::K_NUM_UNPACK_STREAM: |
||
| 890 | // NumUnpackStreams per folder |
||
| 891 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 892 | return -1; |
||
| 893 | } |
||
| 894 | $cursor += $propSize; |
||
| 895 | break; |
||
| 896 | |||
| 897 | case self::K_SIZE: |
||
| 898 | // Sizes |
||
| 899 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 900 | return -1; |
||
| 901 | } |
||
| 902 | $cursor += $propSize; |
||
| 903 | break; |
||
| 904 | |||
| 905 | case self::K_CRC: |
||
| 906 | // CRCs |
||
| 907 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 908 | return -1; |
||
| 909 | } |
||
| 910 | $cursor += $propSize; |
||
| 911 | break; |
||
| 912 | |||
| 913 | default: |
||
| 914 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 915 | return -1; |
||
| 916 | } |
||
| 917 | $cursor += $propSize; |
||
| 918 | break; |
||
| 919 | } |
||
| 920 | } |
||
| 921 | |||
| 922 | return $cursor; |
||
| 923 | } |
||
| 924 | |||
| 925 | /** |
||
| 926 | * Skip a bit vector structure. |
||
| 927 | */ |
||
| 928 | private function skipBitVector(int $cursor, int $limit, int $numItems): int |
||
| 929 | { |
||
| 930 | $allDefined = ord($this->data[$cursor]); |
||
| 931 | $cursor++; |
||
| 932 | |||
| 933 | if ($allDefined === 0) { |
||
| 934 | // Bit array follows |
||
| 935 | $numBytes = (int) ceil($numItems / 8); |
||
| 936 | $cursor += $numBytes; |
||
| 937 | } |
||
| 938 | |||
| 939 | // Then comes the actual CRC values |
||
| 940 | for ($i = 0; $i < $numItems && $cursor + 4 <= $limit; $i++) { |
||
| 941 | $cursor += 4; |
||
| 942 | } |
||
| 943 | |||
| 944 | return $cursor; |
||
| 945 | } |
||
| 946 | |||
| 947 | private function parseFilesInfo(int $cursor, int $limit): int |
||
| 948 | { |
||
| 949 | // Number of files (VInt) |
||
| 950 | $numFiles = $this->readVIntAt($cursor, $value, $newCursor, $limit) ? $value : null; |
||
| 951 | if ($numFiles === null || $numFiles < 0 || $numFiles > 100000) { // sanity cap |
||
| 952 | $this->lastError = 'Invalid number of files: '.$numFiles; |
||
| 953 | |||
| 954 | return $limit; |
||
| 955 | } |
||
| 956 | $cursor = $newCursor; |
||
| 957 | $this->numFiles = $numFiles; |
||
| 958 | |||
| 959 | // Initialize arrays for metadata |
||
| 960 | $names = []; |
||
| 961 | $emptyStreams = []; |
||
| 962 | $emptyFiles = []; |
||
| 963 | $antiFiles = []; |
||
| 964 | |||
| 965 | // Property loop until K_END |
||
| 966 | while ($cursor < $limit) { |
||
| 967 | $propId = ord($this->data[$cursor]); |
||
| 968 | $cursor++; |
||
| 969 | |||
| 970 | if ($propId === self::K_END) { |
||
| 971 | break; |
||
| 972 | } |
||
| 973 | |||
| 974 | // Size of property data (VInt) |
||
| 975 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 976 | break; |
||
| 977 | } |
||
| 978 | if ($propSize < 0 || $propSize > ($limit - $cursor)) { |
||
| 979 | break; |
||
| 980 | } |
||
| 981 | |||
| 982 | $propStart = $cursor; |
||
| 983 | |||
| 984 | switch ($propId) { |
||
| 985 | case self::K_EMPTY_STREAM: |
||
| 986 | $emptyStreams = $this->parseBitVector($cursor, $propSize, $numFiles); |
||
| 987 | $cursor = $propStart + $propSize; |
||
| 988 | break; |
||
| 989 | |||
| 990 | case self::K_EMPTY_FILE: |
||
| 991 | $numEmpty = count(array_filter($emptyStreams)); |
||
| 992 | $emptyFiles = $this->parseBitVector($cursor, $propSize, $numEmpty); |
||
| 993 | $cursor = $propStart + $propSize; |
||
| 994 | break; |
||
| 995 | |||
| 996 | case self::K_ANTI: |
||
| 997 | $numEmpty = count(array_filter($emptyStreams)); |
||
| 998 | $antiFiles = $this->parseBitVector($cursor, $propSize, $numEmpty); |
||
| 999 | $cursor = $propStart + $propSize; |
||
| 1000 | break; |
||
| 1001 | |||
| 1002 | case self::K_NAME: |
||
| 1003 | $names = $this->parseNames($cursor, $propSize, $numFiles); |
||
| 1004 | $cursor = $propStart + $propSize; |
||
| 1005 | break; |
||
| 1006 | |||
| 1007 | case self::K_MTIME: |
||
| 1008 | $this->mtimes = $this->parseFileTimes($cursor, $propSize, $numFiles); |
||
| 1009 | $cursor = $propStart + $propSize; |
||
| 1010 | break; |
||
| 1011 | |||
| 1012 | case self::K_CTIME: |
||
| 1013 | $this->ctimes = $this->parseFileTimes($cursor, $propSize, $numFiles); |
||
| 1014 | $cursor = $propStart + $propSize; |
||
| 1015 | break; |
||
| 1016 | |||
| 1017 | case self::K_ATIME: |
||
| 1018 | $this->atimes = $this->parseFileTimes($cursor, $propSize, $numFiles); |
||
| 1019 | $cursor = $propStart + $propSize; |
||
| 1020 | break; |
||
| 1021 | |||
| 1022 | case self::K_WIN_ATTRIBUTES: |
||
| 1023 | $this->attributes = $this->parseAttributes($cursor, $propSize, $numFiles); |
||
| 1024 | $cursor = $propStart + $propSize; |
||
| 1025 | break; |
||
| 1026 | |||
| 1027 | case self::K_CRC: |
||
| 1028 | $this->crcs = $this->parseCRCs($cursor, $propSize, $numFiles); |
||
| 1029 | $cursor = $propStart + $propSize; |
||
| 1030 | break; |
||
| 1031 | |||
| 1032 | default: |
||
| 1033 | // Skip unknown property |
||
| 1034 | $cursor = $propStart + $propSize; |
||
| 1035 | break; |
||
| 1036 | } |
||
| 1037 | } |
||
| 1038 | |||
| 1039 | // Assign collected names |
||
| 1040 | if (! empty($names)) { |
||
| 1041 | $this->names = array_values(array_unique($names)); |
||
| 1042 | } |
||
| 1043 | |||
| 1044 | return $cursor; |
||
| 1045 | } |
||
| 1046 | |||
| 1047 | /** |
||
| 1048 | * Parse file names from K_NAME property. |
||
| 1049 | */ |
||
| 1050 | private function parseNames(int $cursor, int $propSize, int $numFiles): array |
||
| 1051 | { |
||
| 1052 | $names = []; |
||
| 1053 | |||
| 1054 | if ($propSize < 1) { |
||
| 1055 | return $names; |
||
| 1056 | } |
||
| 1057 | |||
| 1058 | $external = ord($this->data[$cursor]); |
||
| 1059 | if ($external !== 0) { // External data not supported |
||
| 1060 | return $names; |
||
| 1061 | } |
||
| 1062 | |||
| 1063 | $nameBytes = $propSize - 1; |
||
| 1064 | $cursor++; |
||
| 1065 | |||
| 1066 | if ($nameBytes <= 0) { |
||
| 1067 | return $names; |
||
| 1068 | } |
||
| 1069 | |||
| 1070 | $blob = substr($this->data, $cursor, $nameBytes); |
||
| 1071 | |||
| 1072 | // Ensure even length for UTF-16LE. Truncate last byte if odd. |
||
| 1073 | if (($nameBytes & 1) === 1) { |
||
| 1074 | $blob = substr($blob, 0, -1); |
||
| 1075 | } |
||
| 1076 | |||
| 1077 | // Split on UTF-16LE null terminators (00 00) |
||
| 1078 | $segments = preg_split('/\x00\x00/', $blob); |
||
| 1079 | |||
| 1080 | foreach ($segments as $seg) { |
||
| 1081 | if ($seg === '') { |
||
| 1082 | continue; |
||
| 1083 | } |
||
| 1084 | |||
| 1085 | $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $seg); |
||
| 1086 | if ($utf8 === false) { |
||
| 1087 | continue; |
||
| 1088 | } |
||
| 1089 | |||
| 1090 | $utf8 = trim($utf8); |
||
| 1091 | if ($utf8 === '') { |
||
| 1092 | continue; |
||
| 1093 | } |
||
| 1094 | |||
| 1095 | // Normalize path separators |
||
| 1096 | $utf8Clean = str_replace(['\\'], '/', $utf8); |
||
| 1097 | // Remove leading './' |
||
| 1098 | $utf8Clean = preg_replace('#^\./#', '', $utf8Clean); |
||
| 1099 | |||
| 1100 | if ($utf8Clean === '' || substr_count($utf8Clean, '/') > 16) { // excessive depth -> skip |
||
| 1101 | continue; |
||
| 1102 | } |
||
| 1103 | |||
| 1104 | $names[] = $utf8Clean; |
||
| 1105 | |||
| 1106 | if (count($names) >= $numFiles) { |
||
| 1107 | break; |
||
| 1108 | } |
||
| 1109 | } |
||
| 1110 | |||
| 1111 | return $names; |
||
| 1112 | } |
||
| 1113 | |||
| 1114 | /** |
||
| 1115 | * Parse a bit vector from property data. |
||
| 1116 | * |
||
| 1117 | * @return array<int, bool> |
||
| 1118 | */ |
||
| 1119 | private function parseBitVector(int $cursor, int $propSize, int $numItems): array |
||
| 1120 | { |
||
| 1121 | $result = array_fill(0, $numItems, false); |
||
| 1122 | |||
| 1123 | if ($propSize < 1) { |
||
| 1124 | return $result; |
||
| 1125 | } |
||
| 1126 | |||
| 1127 | $allDefined = ord($this->data[$cursor]); |
||
| 1128 | $cursor++; |
||
| 1129 | |||
| 1130 | if ($allDefined !== 0) { |
||
| 1131 | // All items are defined |
||
| 1132 | return array_fill(0, $numItems, true); |
||
| 1133 | } |
||
| 1134 | |||
| 1135 | // Parse bit array |
||
| 1136 | $numBytes = (int) ceil($numItems / 8); |
||
| 1137 | for ($i = 0; $i < $numItems; $i++) { |
||
| 1138 | $byteIndex = (int) ($i / 8); |
||
| 1139 | $bitIndex = 7 - ($i % 8); |
||
| 1140 | if ($cursor + $byteIndex < $this->len) { |
||
| 1141 | $byte = ord($this->data[$cursor + $byteIndex]); |
||
| 1142 | $result[$i] = (($byte >> $bitIndex) & 1) === 1; |
||
| 1143 | } |
||
| 1144 | } |
||
| 1145 | |||
| 1146 | return $result; |
||
| 1147 | } |
||
| 1148 | |||
| 1149 | /** |
||
| 1150 | * Parse file times from property data. |
||
| 1151 | * |
||
| 1152 | * @return array<int, int|null> |
||
| 1153 | */ |
||
| 1154 | private function parseFileTimes(int $cursor, int $propSize, int $numFiles): array |
||
| 1209 | } |
||
| 1210 | |||
| 1211 | /** |
||
| 1212 | * Parse Windows attributes from property data. |
||
| 1213 | * |
||
| 1214 | * @return array<int, int|null> |
||
| 1215 | */ |
||
| 1216 | private function parseAttributes(int $cursor, int $propSize, int $numFiles): array |
||
| 1217 | { |
||
| 1218 | $attrs = []; |
||
| 1219 | |||
| 1220 | if ($propSize < 1) { |
||
| 1221 | return $attrs; |
||
| 1222 | } |
||
| 1223 | |||
| 1224 | // AllDefined byte |
||
| 1225 | $allDefined = ord($this->data[$cursor]); |
||
| 1226 | $cursor++; |
||
| 1227 | |||
| 1228 | $definedBits = []; |
||
| 1229 | if ($allDefined === 0) { |
||
| 1230 | $numBytes = (int) ceil($numFiles / 8); |
||
| 1231 | for ($i = 0; $i < $numFiles; $i++) { |
||
| 1232 | $byteIndex = (int) ($i / 8); |
||
| 1233 | $bitIndex = 7 - ($i % 8); |
||
| 1234 | if ($cursor + $byteIndex < $this->len) { |
||
| 1235 | $byte = ord($this->data[$cursor + $byteIndex]); |
||
| 1236 | $definedBits[$i] = (($byte >> $bitIndex) & 1) === 1; |
||
| 1237 | } else { |
||
| 1238 | $definedBits[$i] = false; |
||
| 1239 | } |
||
| 1240 | } |
||
| 1241 | $cursor += $numBytes; |
||
| 1242 | } else { |
||
| 1243 | $definedBits = array_fill(0, $numFiles, true); |
||
| 1244 | } |
||
| 1245 | |||
| 1246 | // External flag |
||
| 1247 | if ($cursor >= $this->len) { |
||
| 1248 | return $attrs; |
||
| 1249 | } |
||
| 1250 | $external = ord($this->data[$cursor]); |
||
| 1251 | $cursor++; |
||
| 1252 | |||
| 1253 | if ($external !== 0) { |
||
| 1254 | return $attrs; |
||
| 1255 | } |
||
| 1256 | |||
| 1257 | // Read attributes (4 bytes each) |
||
| 1258 | for ($i = 0; $i < $numFiles; $i++) { |
||
| 1259 | if (! empty($definedBits[$i]) && $cursor + 4 <= $this->len) { |
||
| 1260 | $attrs[$i] = $this->readUInt32LE($cursor); |
||
| 1261 | $cursor += 4; |
||
| 1262 | } else { |
||
| 1263 | $attrs[$i] = null; |
||
| 1264 | } |
||
| 1265 | } |
||
| 1266 | |||
| 1267 | return $attrs; |
||
| 1268 | } |
||
| 1269 | |||
| 1270 | /** |
||
| 1271 | * Parse CRC values from property data. |
||
| 1272 | * |
||
| 1273 | * @return array<int, string|null> |
||
| 1274 | */ |
||
| 1275 | private function parseCRCs(int $cursor, int $propSize, int $numFiles): array |
||
| 1276 | { |
||
| 1277 | $crcs = []; |
||
| 1278 | |||
| 1279 | if ($propSize < 1) { |
||
| 1280 | return $crcs; |
||
| 1281 | } |
||
| 1282 | |||
| 1283 | // AllDefined byte |
||
| 1284 | $allDefined = ord($this->data[$cursor]); |
||
| 1285 | $cursor++; |
||
| 1286 | |||
| 1287 | $definedBits = []; |
||
| 1288 | if ($allDefined === 0) { |
||
| 1289 | $numBytes = (int) ceil($numFiles / 8); |
||
| 1290 | for ($i = 0; $i < $numFiles; $i++) { |
||
| 1291 | $byteIndex = (int) ($i / 8); |
||
| 1292 | $bitIndex = 7 - ($i % 8); |
||
| 1293 | if ($cursor + $byteIndex < $this->len) { |
||
| 1294 | $byte = ord($this->data[$cursor + $byteIndex]); |
||
| 1295 | $definedBits[$i] = (($byte >> $bitIndex) & 1) === 1; |
||
| 1296 | } else { |
||
| 1297 | $definedBits[$i] = false; |
||
| 1298 | } |
||
| 1299 | } |
||
| 1300 | $cursor += $numBytes; |
||
| 1301 | } else { |
||
| 1302 | $definedBits = array_fill(0, $numFiles, true); |
||
| 1303 | } |
||
| 1304 | |||
| 1305 | // Read CRC values (4 bytes each) |
||
| 1306 | for ($i = 0; $i < $numFiles; $i++) { |
||
| 1307 | if (! empty($definedBits[$i]) && $cursor + 4 <= $this->len) { |
||
| 1308 | $crc = $this->readUInt32LE($cursor); |
||
| 1309 | $crcs[$i] = sprintf('%08X', $crc); |
||
| 1310 | $cursor += 4; |
||
| 1311 | } else { |
||
| 1312 | $crcs[$i] = null; |
||
| 1313 | } |
||
| 1314 | } |
||
| 1315 | |||
| 1316 | return $crcs; |
||
| 1317 | } |
||
| 1318 | |||
| 1319 | /** |
||
| 1320 | * Build consolidated file info array. |
||
| 1321 | */ |
||
| 1322 | private function buildFileInfo(): void |
||
| 1323 | { |
||
| 1324 | $numFiles = max(count($this->names), $this->numFiles); |
||
| 1325 | |||
| 1326 | for ($i = 0; $i < $numFiles; $i++) { |
||
| 1327 | $isDir = false; |
||
| 1328 | if (isset($this->attributes[$i])) { |
||
| 1329 | $isDir = ($this->attributes[$i] & self::FILE_ATTRIBUTE_DIRECTORY) !== 0; |
||
| 1330 | } |
||
| 1331 | |||
| 1332 | $this->files[$i] = [ |
||
| 1333 | 'name' => $this->names[$i] ?? null, |
||
| 1334 | 'size' => $this->sizes[$i] ?? null, |
||
| 1335 | 'packed_size' => $this->packedSizes[$i] ?? null, |
||
| 1336 | 'crc' => $this->crcs[$i] ?? null, |
||
| 1337 | 'attributes' => $this->attributes[$i] ?? null, |
||
| 1338 | 'is_dir' => $isDir, |
||
| 1339 | 'mtime' => $this->mtimes[$i] ?? null, |
||
| 1340 | 'ctime' => $this->ctimes[$i] ?? null, |
||
| 1341 | 'atime' => $this->atimes[$i] ?? null, |
||
| 1342 | ]; |
||
| 1343 | } |
||
| 1344 | } |
||
| 1345 | |||
| 1346 | /** |
||
| 1347 | * Scan raw data for compression method signatures. |
||
| 1348 | */ |
||
| 1349 | private function scanForCompressionMethods(int $start, int $end): void |
||
| 1350 | { |
||
| 1351 | $data = substr($this->data, $start, $end - $start); |
||
| 1352 | |||
| 1353 | // Look for common method signatures |
||
| 1354 | if (strpos($data, self::METHOD_AES) !== false) { |
||
| 1355 | $this->encrypted = true; |
||
| 1356 | $this->compressionMethods[] = 'AES-256'; |
||
| 1357 | } |
||
| 1358 | if (strpos($data, self::METHOD_LZMA2) !== false) { |
||
| 1359 | $this->compressionMethods[] = 'LZMA2'; |
||
| 1360 | } |
||
| 1361 | if (strpos($data, self::METHOD_LZMA) !== false) { |
||
| 1362 | $this->compressionMethods[] = 'LZMA'; |
||
| 1363 | } |
||
| 1364 | if (strpos($data, self::METHOD_PPMD) !== false) { |
||
| 1365 | $this->compressionMethods[] = 'PPMd'; |
||
| 1366 | } |
||
| 1367 | if (strpos($data, self::METHOD_BZIP2) !== false) { |
||
| 1368 | $this->compressionMethods[] = 'BZip2'; |
||
| 1369 | } |
||
| 1370 | if (strpos($data, self::METHOD_DEFLATE) !== false) { |
||
| 1371 | $this->compressionMethods[] = 'Deflate'; |
||
| 1372 | } |
||
| 1373 | } |
||
| 1374 | |||
| 1375 | /** |
||
| 1376 | * Fallback scanning for filenames when normal parsing fails. |
||
| 1377 | * Attempts to find UTF-16LE encoded filenames in the raw data. |
||
| 1378 | */ |
||
| 1379 | private function fallbackScanForFilenames(): void |
||
| 1380 | { |
||
| 1381 | if (! empty($this->names)) { |
||
| 1382 | return; // Already have names |
||
| 1383 | } |
||
| 1384 | |||
| 1385 | // Look for common file extension patterns in UTF-16LE |
||
| 1386 | $patterns = [ |
||
| 1387 | '.avi', '.mkv', '.mp4', '.wmv', '.mov', |
||
| 1388 | '.mp3', '.flac', '.wav', '.ogg', |
||
| 1389 | '.rar', '.zip', '.exe', '.dll', |
||
| 1390 | '.nfo', '.txt', '.pdf', '.doc', |
||
| 1391 | '.jpg', '.png', '.gif', '.bmp', |
||
| 1392 | '.iso', '.bin', '.img', '.nrg', |
||
| 1393 | ]; |
||
| 1394 | |||
| 1395 | $found = []; |
||
| 1396 | |||
| 1397 | foreach ($patterns as $ext) { |
||
| 1398 | // Convert extension to UTF-16LE for searching |
||
| 1399 | $utf16Ext = @iconv('UTF-8', 'UTF-16LE', $ext); |
||
| 1400 | if ($utf16Ext === false) { |
||
| 1401 | continue; |
||
| 1402 | } |
||
| 1403 | |||
| 1404 | $pos = 0; |
||
| 1405 | while (($pos = strpos($this->data, $utf16Ext, $pos)) !== false) { |
||
| 1406 | // Try to extract the full filename by looking backwards |
||
| 1407 | $start = $this->findFilenameStart($pos); |
||
| 1408 | if ($start !== false && $start < $pos) { |
||
| 1409 | $len = ($pos - $start) + strlen($utf16Ext); |
||
| 1410 | $nameData = substr($this->data, $start, $len); |
||
| 1411 | |||
| 1412 | // Validate it's likely a filename (reasonable length, no control chars) |
||
| 1413 | if (strlen($nameData) > 2 && strlen($nameData) < 512) { |
||
| 1414 | $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $nameData); |
||
| 1415 | if ($utf8 !== false && $this->isValidFilename($utf8)) { |
||
| 1416 | $utf8 = str_replace('\\', '/', $utf8); |
||
| 1417 | $found[] = $utf8; |
||
| 1418 | } |
||
| 1419 | } |
||
| 1420 | } |
||
| 1421 | $pos += strlen($utf16Ext); |
||
| 1422 | } |
||
| 1423 | } |
||
| 1424 | |||
| 1425 | // Also try to find NFO files specifically (common in releases) |
||
| 1426 | $this->scanForNfoFiles($found); |
||
| 1427 | |||
| 1428 | if (! empty($found)) { |
||
| 1429 | $this->names = array_values(array_unique($found)); |
||
| 1430 | } |
||
| 1431 | } |
||
| 1432 | |||
| 1433 | /** |
||
| 1434 | * Find the start of a UTF-16LE filename by scanning backwards. |
||
| 1435 | */ |
||
| 1436 | private function findFilenameStart(int $extensionPos): int|false |
||
| 1437 | { |
||
| 1438 | // Scan backwards looking for a null terminator or invalid char |
||
| 1439 | $start = $extensionPos; |
||
| 1440 | $maxLen = 256; // Max filename length to consider |
||
| 1441 | |||
| 1442 | for ($i = $extensionPos - 2; $i >= max(0, $extensionPos - $maxLen * 2); $i -= 2) { |
||
| 1443 | if ($i + 1 >= $this->len) { |
||
| 1444 | continue; |
||
| 1445 | } |
||
| 1446 | |||
| 1447 | $lo = ord($this->data[$i]); |
||
| 1448 | $hi = ord($this->data[$i + 1]); |
||
| 1449 | |||
| 1450 | // Check for null terminator |
||
| 1451 | if ($lo === 0 && $hi === 0) { |
||
| 1452 | $start = $i + 2; |
||
| 1453 | break; |
||
| 1454 | } |
||
| 1455 | |||
| 1456 | // Check for invalid filename characters |
||
| 1457 | if ($hi === 0) { |
||
| 1458 | // ASCII range - check for path separators or invalid chars |
||
| 1459 | if ($lo === 0 || $lo < 32) { |
||
| 1460 | $start = $i + 2; |
||
| 1461 | break; |
||
| 1462 | } |
||
| 1463 | } |
||
| 1464 | |||
| 1465 | $start = $i; |
||
| 1466 | } |
||
| 1467 | |||
| 1468 | return $start >= 0 ? $start : false; |
||
| 1469 | } |
||
| 1470 | |||
| 1471 | /** |
||
| 1472 | * Validate if a string looks like a valid filename. |
||
| 1473 | */ |
||
| 1474 | private function isValidFilename(string $name): bool |
||
| 1475 | { |
||
| 1476 | $name = trim($name); |
||
| 1477 | |||
| 1478 | if (empty($name) || strlen($name) > 260) { |
||
| 1479 | return false; |
||
| 1480 | } |
||
| 1481 | |||
| 1482 | // Must have at least one printable character |
||
| 1483 | if (! preg_match('/[a-zA-Z0-9]/', $name)) { |
||
| 1484 | return false; |
||
| 1485 | } |
||
| 1486 | |||
| 1487 | // Should not have too many special characters |
||
| 1488 | $specialCount = preg_match_all('/[^\w\s.\-_\/\\\\]/', $name); |
||
| 1489 | if ($specialCount > 5) { |
||
| 1490 | return false; |
||
| 1491 | } |
||
| 1492 | |||
| 1493 | return true; |
||
| 1494 | } |
||
| 1495 | |||
| 1496 | /** |
||
| 1497 | * Scan for NFO files which are very common in release archives. |
||
| 1498 | */ |
||
| 1499 | private function scanForNfoFiles(array &$found): void |
||
| 1500 | { |
||
| 1501 | // NFO files are extremely common and usually have simple names |
||
| 1502 | $nfoMarker = @iconv('UTF-8', 'UTF-16LE', '.nfo'); |
||
| 1503 | if ($nfoMarker === false) { |
||
| 1504 | return; |
||
| 1505 | } |
||
| 1506 | |||
| 1507 | $pos = 0; |
||
| 1508 | while (($pos = strpos($this->data, $nfoMarker, $pos)) !== false) { |
||
| 1509 | $start = $this->findFilenameStart($pos); |
||
| 1510 | if ($start !== false && $start < $pos) { |
||
| 1511 | $len = ($pos - $start) + strlen($nfoMarker); |
||
| 1512 | $nameData = substr($this->data, $start, $len); |
||
| 1513 | |||
| 1514 | if (strlen($nameData) > 2 && strlen($nameData) < 256) { |
||
| 1515 | $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $nameData); |
||
| 1516 | if ($utf8 !== false && $this->isValidFilename($utf8)) { |
||
| 1517 | $found[] = str_replace('\\', '/', $utf8); |
||
| 1518 | } |
||
| 1519 | } |
||
| 1520 | } |
||
| 1521 | $pos += strlen($nfoMarker); |
||
| 1522 | } |
||
| 1523 | } |
||
| 1524 | |||
| 1525 | /** |
||
| 1526 | * Convert Windows FILETIME to Unix timestamp. |
||
| 1527 | */ |
||
| 1528 | private function filetimeToUnix(int $filetime): int |
||
| 1529 | { |
||
| 1530 | // FILETIME is 100-nanosecond intervals since January 1, 1601 |
||
| 1531 | // Unix epoch is January 1, 1970 |
||
| 1532 | // Difference is 116444736000000000 100-ns intervals |
||
| 1533 | $unixEpochDiff = 116444736000000000; |
||
| 1534 | |||
| 1535 | if ($filetime <= $unixEpochDiff) { |
||
| 1536 | return 0; |
||
| 1537 | } |
||
| 1538 | |||
| 1539 | return (int) (($filetime - $unixEpochDiff) / 10000000); |
||
| 1540 | } |
||
| 1541 | |||
| 1542 | private function skipUntilEnd(int $cursor, int $limit): int |
||
| 1543 | { |
||
| 1544 | while ($cursor < $limit) { |
||
| 1545 | $id = ord($this->data[$cursor]); |
||
| 1546 | $cursor++; |
||
| 1547 | if ($id === self::K_END) { |
||
| 1548 | return $cursor; |
||
| 1549 | } |
||
| 1550 | // Property-like: read size then skip |
||
| 1551 | if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) { |
||
| 1552 | return -1; |
||
| 1553 | } |
||
| 1554 | if ($propSize < 0 || $propSize > ($limit - $cursor)) { |
||
| 1555 | return -1; |
||
| 1556 | } |
||
| 1557 | $cursor += $propSize; |
||
| 1558 | } |
||
| 1559 | |||
| 1560 | return -1; |
||
| 1561 | } |
||
| 1562 | |||
| 1563 | /** |
||
| 1564 | * Reads a 7z variable-length integer at offset, returns value via reference. |
||
| 1565 | */ |
||
| 1566 | private function readVIntAt(int $offset, ?int &$value, ?int &$newOffset, int $limit): bool |
||
| 1567 | { |
||
| 1568 | $value = 0; |
||
| 1569 | $shift = 0; |
||
| 1570 | $pos = $offset; |
||
| 1571 | while ($pos < $limit && $shift <= 63) { |
||
| 1572 | $b = ord($this->data[$pos]); |
||
| 1573 | $pos++; |
||
| 1574 | $value |= ($b & 0x7F) << $shift; |
||
| 1575 | if (($b & 0x80) === 0) { |
||
| 1576 | $newOffset = $pos; |
||
| 1577 | |||
| 1578 | return true; |
||
| 1579 | } |
||
| 1580 | $shift += 7; |
||
| 1581 | } |
||
| 1582 | |||
| 1583 | return false; |
||
| 1584 | } |
||
| 1585 | |||
| 1586 | private function readUInt64LE(int $offset): int |
||
| 1587 | { |
||
| 1588 | if ($offset + 8 > $this->len) { |
||
| 1589 | return 0; |
||
| 1590 | } |
||
| 1591 | $v = 0; |
||
| 1592 | for ($i = 0; $i < 8; $i++) { |
||
| 1593 | $v |= ord($this->data[$offset + $i]) << ($i * 8); |
||
| 1594 | } |
||
| 1595 | |||
| 1596 | // Constrain to PHP int (on 64-bit fine; on 32-bit may overflow but those environments uncommon here) |
||
| 1597 | return $v & 0xFFFFFFFFFFFFFFFF; |
||
| 1598 | } |
||
| 1599 | |||
| 1600 | /** |
||
| 1601 | * Read a 32-bit unsigned little-endian integer. |
||
| 1602 | */ |
||
| 1603 | private function readUInt32LE(int $offset): int |
||
| 1604 | { |
||
| 1605 | if ($offset + 4 > $this->len) { |
||
| 1606 | return 0; |
||
| 1607 | } |
||
| 1608 | |||
| 1609 | return ord($this->data[$offset]) |
||
| 1610 | | (ord($this->data[$offset + 1]) << 8) |
||
| 1611 | | (ord($this->data[$offset + 2]) << 16) |
||
| 1612 | | (ord($this->data[$offset + 3]) << 24); |
||
| 1613 | } |
||
| 1614 | |||
| 1615 | /** |
||
| 1616 | * Extract a summary of archive information as an associative array. |
||
| 1617 | * Useful for quick inspection of archive contents. |
||
| 1618 | */ |
||
| 1619 | public function getSummary(): array |
||
| 1620 | { |
||
| 1621 | if (! $this->parsed) { |
||
| 1622 | $this->parse(); |
||
| 1623 | } |
||
| 1624 | |||
| 1625 | $dirs = 0; |
||
| 1626 | $regularFiles = 0; |
||
| 1627 | |||
| 1628 | foreach ($this->files as $file) { |
||
| 1629 | if ($file['is_dir']) { |
||
| 1630 | $dirs++; |
||
| 1631 | } else { |
||
| 1632 | $regularFiles++; |
||
| 1633 | } |
||
| 1634 | } |
||
| 1635 | |||
| 1636 | return [ |
||
| 1637 | 'valid_signature' => $this->isValid7zSignature(), |
||
| 1638 | 'file_count' => $this->numFiles, |
||
| 1639 | 'directory_count' => $dirs, |
||
| 1640 | 'regular_file_count' => $regularFiles, |
||
| 1641 | 'total_unpacked_size' => $this->totalUnpackedSize, |
||
| 1642 | 'total_packed_size' => $this->totalPackedSize, |
||
| 1643 | 'compression_ratio' => $this->getCompressionRatio(), |
||
| 1644 | 'compression_methods' => $this->getCompressionMethods(), |
||
| 1645 | 'encrypted' => $this->encrypted, |
||
| 1646 | 'header_encrypted' => $this->headerEncrypted, |
||
| 1647 | 'encoded_header' => $this->encodedHeader, |
||
| 1648 | 'solid_archive' => $this->solidArchive, |
||
| 1649 | 'last_error' => $this->lastError, |
||
| 1650 | ]; |
||
| 1651 | } |
||
| 1652 | |||
| 1653 | /** |
||
| 1654 | * Get files matching a specific extension. |
||
| 1655 | * |
||
| 1656 | * @return array<int, array> |
||
| 1657 | */ |
||
| 1658 | public function getFilesByExtension(string $extension): array |
||
| 1659 | { |
||
| 1660 | if (! $this->parsed) { |
||
| 1661 | $this->parse(); |
||
| 1662 | } |
||
| 1663 | |||
| 1664 | $extension = ltrim(strtolower($extension), '.'); |
||
| 1665 | $matches = []; |
||
| 1666 | |||
| 1667 | foreach ($this->files as $index => $file) { |
||
| 1668 | if ($file['name'] === null) { |
||
| 1669 | continue; |
||
| 1670 | } |
||
| 1671 | |||
| 1672 | $fileExt = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); |
||
| 1673 | if ($fileExt === $extension) { |
||
| 1674 | $matches[$index] = $file; |
||
| 1675 | } |
||
| 1676 | } |
||
| 1677 | |||
| 1678 | return $matches; |
||
| 1679 | } |
||
| 1680 | |||
| 1681 | /** |
||
| 1682 | * Check if archive contains any files with given extension. |
||
| 1683 | */ |
||
| 1684 | public function hasFileWithExtension(string $extension): bool |
||
| 1685 | { |
||
| 1686 | return ! empty($this->getFilesByExtension($extension)); |
||
| 1687 | } |
||
| 1688 | |||
| 1689 | /** |
||
| 1690 | * Get all directory entries from the archive. |
||
| 1691 | * |
||
| 1692 | * @return array<int, array> |
||
| 1693 | */ |
||
| 1694 | public function getDirectories(): array |
||
| 1695 | { |
||
| 1696 | if (! $this->parsed) { |
||
| 1697 | $this->parse(); |
||
| 1698 | } |
||
| 1699 | |||
| 1700 | return array_filter($this->files, fn ($file) => $file['is_dir']); |
||
| 1701 | } |
||
| 1702 | |||
| 1703 | /** |
||
| 1704 | * Get the largest file in the archive. |
||
| 1705 | */ |
||
| 1706 | public function getLargestFile(): ?array |
||
| 1723 | } |
||
| 1724 | } |
||
| 1725 | |||
| 1726 |