SevenZipPartialParser::parseUnpackInfo()   B
last analyzed

Complexity

Conditions 9
Paths 12

Size

Total Lines 32
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 20
dl 0
loc 32
rs 8.0555
c 0
b 0
f 0
cc 9
nc 12
nop 2
1
<?php
2
3
namespace App\Services\Archives;
4
5
/**
6
 * Enhanced partial 7z header parser to recover file information from an in-memory buffer.
7
 * Supports unencoded headers directly; encoded headers are flagged so caller can fallback
8
 * to external 7z listing. Provides heuristics for encryption and compression detection.
9
 *
10
 * Features:
11
 * - Extracts file names, sizes, attributes, timestamps, and CRC values
12
 * - Detects compression methods (LZMA, LZMA2, PPMd, BZip2, etc.)
13
 * - Heuristic encryption detection (AES-256, header encryption)
14
 * - Fallback scanning for partial/corrupted archives
15
 * - Solid archive detection
16
 */
17
class SevenZipPartialParser
18
{
19
    private string $data;
20
21
    private int $len;
22
23
    private array $names = [];
24
25
    private array $files = []; // Extended file info with metadata
26
27
    private array $sizes = []; // Uncompressed sizes
28
29
    private array $packedSizes = []; // Compressed sizes
30
31
    private array $crcs = []; // CRC32 values
32
33
    private array $attributes = []; // File attributes (directory, readonly, etc.)
34
35
    private array $mtimes = []; // Modification times
36
37
    private array $ctimes = []; // Creation times
38
39
    private array $atimes = []; // Access times
40
41
    private bool $parsed = false;
42
43
    private bool $encodedHeader = false; // flag if we encountered kEncodedHeader
44
45
    private bool $encrypted = false; // heuristic flag if AES encryption detected
46
47
    private bool $headerEncrypted = false; // flag if header itself is encrypted
48
49
    private bool $solidArchive = false; // flag for solid archives
50
51
    private int $numFiles = 0; // Total number of files detected
52
53
    private array $compressionMethods = []; // Detected compression methods
54
55
    private int $totalUnpackedSize = 0; // Total unpacked size
56
57
    private int $totalPackedSize = 0; // Total packed size
58
59
    private string $lastError = ''; // Last error message for debugging
60
61
    // 7z Property IDs
62
    private const K_END = 0x00;
63
64
    private const K_HEADER = 0x01;
65
66
    private const K_ARCHIVE_PROPERTIES = 0x02;
67
68
    private const K_ADDITIONAL_STREAMS_INFO = 0x03;
69
70
    private const K_MAIN_STREAMS_INFO = 0x04;
71
72
    private const K_FILES_INFO = 0x05;
73
74
    private const K_PACK_INFO = 0x06;
75
76
    private const K_UNPACK_INFO = 0x07;
77
78
    private const K_SUBSTREAMS_INFO = 0x08;
79
80
    private const K_SIZE = 0x09;
81
82
    private const K_CRC = 0x0A;
83
84
    private const K_FOLDER = 0x0B;
85
86
    private const K_CODERS_UNPACK_SIZE = 0x0C;
87
88
    private const K_NUM_UNPACK_STREAM = 0x0D;
89
90
    private const K_EMPTY_STREAM = 0x0E;
91
92
    private const K_EMPTY_FILE = 0x0F;
93
94
    private const K_ANTI = 0x10;
95
96
    private const K_NAME = 0x11;
97
98
    private const K_CTIME = 0x12;
99
100
    private const K_ATIME = 0x13;
101
102
    private const K_MTIME = 0x14;
103
104
    private const K_WIN_ATTRIBUTES = 0x15;
105
106
    private const K_COMMENT = 0x16;
107
108
    private const K_ENCODED_HEADER = 0x17;
109
110
    private const K_START_POS = 0x18;
111
112
    private const K_DUMMY = 0x19;
113
114
    // Compression method IDs
115
    private const METHOD_COPY = "\x00";
116
117
    private const METHOD_LZMA = "\x03\x01\x01";
118
119
    private const METHOD_LZMA2 = "\x21";
120
121
    private const METHOD_PPMD = "\x03\x04\x01";
122
123
    private const METHOD_BCJ = "\x03\x03\x01\x03";
124
125
    private const METHOD_BCJ2 = "\x03\x03\x01\x1B";
126
127
    private const METHOD_DEFLATE = "\x04\x01\x08";
128
129
    private const METHOD_BZIP2 = "\x04\x02\x02";
130
131
    private const METHOD_AES = "\x06\xF1\x07\x01";
132
133
    // Windows file attributes
134
    private const FILE_ATTRIBUTE_READONLY = 0x01;
135
136
    private const FILE_ATTRIBUTE_HIDDEN = 0x02;
137
138
    private const FILE_ATTRIBUTE_SYSTEM = 0x04;
139
140
    private const FILE_ATTRIBUTE_DIRECTORY = 0x10;
141
142
    private const FILE_ATTRIBUTE_ARCHIVE = 0x20;
143
144
    public function __construct(string $data)
145
    {
146
        $this->data = $data;
147
        $this->len = strlen($data);
148
    }
149
150
    /**
151
     * Public accessor: returns recovered filenames (UTF-8) or empty array.
152
     */
153
    public function getFileNames(): array
154
    {
155
        if (! $this->parsed) {
156
            $this->parse();
157
        }
158
159
        return $this->names;
160
    }
161
162
    /**
163
     * Returns detailed file information with all available metadata.
164
     *
165
     * @return array<int, array{name: string, size: int|null, packed_size: int|null, crc: string|null, attributes: int|null, is_dir: bool, mtime: int|null, ctime: int|null, atime: int|null}>
166
     */
167
    public function getFiles(): array
168
    {
169
        if (! $this->parsed) {
170
            $this->parse();
171
        }
172
173
        return $this->files;
174
    }
175
176
    /**
177
     * Returns uncompressed file sizes indexed by file index.
178
     */
179
    public function getSizes(): array
180
    {
181
        if (! $this->parsed) {
182
            $this->parse();
183
        }
184
185
        return $this->sizes;
186
    }
187
188
    /**
189
     * Returns CRC32 values as hex strings indexed by file index.
190
     */
191
    public function getCRCs(): array
192
    {
193
        if (! $this->parsed) {
194
            $this->parse();
195
        }
196
197
        return $this->crcs;
198
    }
199
200
    /**
201
     * Returns file attributes indexed by file index.
202
     */
203
    public function getAttributes(): array
204
    {
205
        if (! $this->parsed) {
206
            $this->parse();
207
        }
208
209
        return $this->attributes;
210
    }
211
212
    /**
213
     * Returns modification times (Unix timestamps) indexed by file index.
214
     */
215
    public function getModificationTimes(): array
216
    {
217
        if (! $this->parsed) {
218
            $this->parse();
219
        }
220
221
        return $this->mtimes;
222
    }
223
224
    /**
225
     * Returns detected compression methods used in the archive.
226
     */
227
    public function getCompressionMethods(): array
228
    {
229
        if (! $this->parsed) {
230
            $this->parse();
231
        }
232
233
        return array_unique($this->compressionMethods);
234
    }
235
236
    /**
237
     * Returns total number of files detected.
238
     */
239
    public function getFileCount(): int
240
    {
241
        if (! $this->parsed) {
242
            $this->parse();
243
        }
244
245
        return $this->numFiles;
246
    }
247
248
    /**
249
     * Returns total unpacked (uncompressed) size of all files.
250
     */
251
    public function getTotalUnpackedSize(): int
252
    {
253
        if (! $this->parsed) {
254
            $this->parse();
255
        }
256
257
        return $this->totalUnpackedSize;
258
    }
259
260
    /**
261
     * Returns total packed (compressed) size.
262
     */
263
    public function getTotalPackedSize(): int
264
    {
265
        if (! $this->parsed) {
266
            $this->parse();
267
        }
268
269
        return $this->totalPackedSize;
270
    }
271
272
    /**
273
     * Public accessor: returns true if AES encryption is detected (heuristic).
274
     */
275
    public function isEncrypted(): bool
276
    {
277
        if (! $this->parsed) {
278
            $this->parse();
279
        }
280
281
        return $this->encrypted;
282
    }
283
284
    /**
285
     * Returns true if the header itself is encrypted.
286
     */
287
    public function isHeaderEncrypted(): bool
288
    {
289
        if (! $this->parsed) {
290
            $this->parse();
291
        }
292
293
        return $this->headerEncrypted;
294
    }
295
296
    /**
297
     * Returns true if this is a solid archive.
298
     */
299
    public function isSolidArchive(): bool
300
    {
301
        if (! $this->parsed) {
302
            $this->parse();
303
        }
304
305
        return $this->solidArchive;
306
    }
307
308
    public function hasEncodedHeader(): bool
309
    {
310
        if (! $this->parsed) {
311
            $this->parse();
312
        }
313
314
        return $this->encodedHeader;
315
    }
316
317
    /**
318
     * Returns the last error message for debugging.
319
     */
320
    public function getLastError(): string
321
    {
322
        return $this->lastError;
323
    }
324
325
    /**
326
     * Check if the data appears to be a valid 7z archive (has correct signature).
327
     */
328
    public function isValid7zSignature(): bool
329
    {
330
        return $this->len >= 6 && strncmp($this->data, "\x37\x7A\xBC\xAF\x27\x1C", 6) === 0;
331
    }
332
333
    /**
334
     * Returns compression ratio as a float (0.0 to 1.0+), or null if unknown.
335
     */
336
    public function getCompressionRatio(): ?float
337
    {
338
        if (! $this->parsed) {
339
            $this->parse();
340
        }
341
342
        if ($this->totalUnpackedSize > 0 && $this->totalPackedSize > 0) {
343
            return $this->totalPackedSize / $this->totalUnpackedSize;
344
        }
345
346
        return null;
347
    }
348
349
    private function parse(): void
350
    {
351
        $this->parsed = true;
352
353
        if ($this->len < 32) { // need at least fixed header
354
            $this->lastError = 'Data too short for 7z header (need at least 32 bytes)';
355
            // Try fallback scanning even for very short data
356
            $this->fallbackScanForFilenames();
357
358
            return;
359
        }
360
361
        // Signature check
362
        if (strncmp($this->data, "\x37\x7A\xBC\xAF\x27\x1C", 6) !== 0) {
363
            $this->lastError = 'Invalid 7z signature';
364
            // Try fallback scanning anyway - data might be partial
365
            $this->fallbackScanForFilenames();
366
367
            return;
368
        }
369
370
        // Read header version
371
        $majorVersion = ord($this->data[6]);
0 ignored issues
show
Unused Code introduced by
The assignment to $majorVersion is dead and can be removed.
Loading history...
372
        $minorVersion = ord($this->data[7]);
0 ignored issues
show
Unused Code introduced by
The assignment to $minorVersion is dead and can be removed.
Loading history...
373
374
        // Read next header info
375
        $nextHeaderOffset = $this->readUInt64LE(12);
376
        $nextHeaderSize = $this->readUInt64LE(20);
377
        $nextHeaderCRC = $this->readUInt32LE(28);
0 ignored issues
show
Unused Code introduced by
The assignment to $nextHeaderCRC is dead and can be removed.
Loading history...
378
379
        // Scan the packed data region for compression method signatures
380
        $this->scanForCompressionMethods(32, min(32 + $nextHeaderOffset, $this->len));
381
382
        // Bounds sanity
383
        if ($nextHeaderSize <= 0 || $nextHeaderSize > 16 * 1024 * 1024) { // cap 16MB header
384
            $this->lastError = 'Invalid next header size: '.$nextHeaderSize;
385
            $this->fallbackScanForFilenames();
386
387
            return;
388
        }
389
390
        $nextHeaderStart = 32 + $nextHeaderOffset;
391
        $nextHeaderEnd = $nextHeaderStart + $nextHeaderSize;
392
393
        if ($nextHeaderEnd > $this->len) {
394
            // Incomplete buffer - still try to parse what we have
395
            $this->lastError = 'Incomplete archive data (header extends beyond buffer)';
396
397
            // Try partial parsing with available data
398
            if ($nextHeaderStart < $this->len) {
399
                $this->parsePartialHeader((int) $nextHeaderStart, $this->len);
400
            }
401
402
            // Also try fallback scanning
403
            $this->fallbackScanForFilenames();
404
405
            return;
406
        }
407
408
        $cursor = (int) $nextHeaderStart;
409
410
        // First byte may be kEncodedHeader (unsupported) or kHeader
411
        $id = ord($this->data[$cursor]);
412
413
        if ($id === self::K_ENCODED_HEADER) {
414
            $this->encodedHeader = true;
415
416
            // Parse encoded header for compression info and encryption detection
417
            $this->parseEncodedHeader($cursor, (int) $nextHeaderEnd);
418
419
            // Fallback scan for any readable filenames
420
            $this->fallbackScanForFilenames();
421
422
            return;
423
        }
424
425
        if ($id !== self::K_HEADER) {
426
            $this->lastError = 'Unexpected header ID: '.$id;
427
            $this->fallbackScanForFilenames();
428
429
            return;
430
        }
431
432
        $cursor++;
433
434
        // Loop until K_END looking for various info blocks
435
        while ($cursor < $nextHeaderEnd) {
436
            $id = ord($this->data[$cursor]);
437
            $cursor++;
438
439
            if ($id === self::K_END) {
440
                break;
441
            }
442
443
            switch ($id) {
444
                case self::K_ARCHIVE_PROPERTIES:
445
                    $cursor = $this->skipUntilEnd($cursor, (int) $nextHeaderEnd);
446
                    break;
447
448
                case self::K_ADDITIONAL_STREAMS_INFO:
449
                    $cursor = $this->skipUntilEnd($cursor, (int) $nextHeaderEnd);
450
                    break;
451
452
                case self::K_MAIN_STREAMS_INFO:
453
                    $cursor = $this->parseMainStreamsInfo($cursor, (int) $nextHeaderEnd);
454
                    break;
455
456
                case self::K_FILES_INFO:
457
                    $cursor = $this->parseFilesInfo($cursor, (int) $nextHeaderEnd);
458
                    break;
459
460
                default:
461
                    // Try to skip unknown property
462
                    if ($this->readVIntAt($cursor, $propSize, $cursor, (int) $nextHeaderEnd)) {
463
                        $cursor += $propSize;
464
                    } else {
465
                        $cursor = -1;
466
                    }
467
                    break;
468
            }
469
470
            if ($cursor === -1) {
471
                break;
472
            }
473
        }
474
475
        // Build consolidated file info array
476
        $this->buildFileInfo();
477
478
        // If we didn't find any files, try fallback
479
        if (empty($this->names)) {
480
            $this->fallbackScanForFilenames();
481
        }
482
    }
483
484
    /**
485
     * Parse encoded header to extract compression method info and detect encryption.
486
     */
487
    private function parseEncodedHeader(int $cursor, int $limit): void
488
    {
489
        $cursor++; // skip K_ENCODED_HEADER byte
490
491
        // Scan for AES encryption signature
492
        $scan = substr($this->data, $cursor, min(1024, $limit - $cursor));
493
        if (strpos($scan, self::METHOD_AES) !== false) {
494
            $this->encrypted = true;
495
            $this->headerEncrypted = true;
496
        }
497
498
        // Look for streams info
499
        while ($cursor < $limit) {
500
            $id = ord($this->data[$cursor]);
501
            $cursor++;
502
503
            if ($id === self::K_END) {
504
                break;
505
            }
506
507
            if ($id === self::K_PACK_INFO) {
508
                $cursor = $this->parsePackInfo($cursor, $limit);
509
            } elseif ($id === self::K_UNPACK_INFO) {
510
                $cursor = $this->parseUnpackInfo($cursor, $limit);
511
            } elseif ($id === self::K_SUBSTREAMS_INFO) {
512
                $cursor = $this->skipUntilEnd($cursor, $limit);
513
            } else {
514
                // Skip unknown
515
                if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
516
                    break;
517
                }
518
                $cursor += $propSize;
519
            }
520
521
            if ($cursor === -1) {
522
                break;
523
            }
524
        }
525
    }
526
527
    /**
528
     * Parse a partial/incomplete header to extract whatever information is available.
529
     */
530
    private function parsePartialHeader(int $start, int $end): void
531
    {
532
        $cursor = $start;
533
534
        // Try to identify what kind of block this might be
535
        if ($cursor < $end) {
536
            $id = ord($this->data[$cursor]);
537
538
            if ($id === self::K_ENCODED_HEADER) {
539
                $this->encodedHeader = true;
540
                $scan = substr($this->data, $cursor, min(512, $end - $cursor));
541
                if (strpos($scan, self::METHOD_AES) !== false) {
542
                    $this->encrypted = true;
543
                    $this->headerEncrypted = true;
544
                }
545
            } elseif ($id === self::K_HEADER) {
546
                $cursor++;
547
                // Try to parse as much as possible
548
                while ($cursor < $end - 1) {
549
                    $blockId = ord($this->data[$cursor]);
550
                    if ($blockId === self::K_FILES_INFO) {
551
                        $cursor++;
552
                        $this->parseFilesInfo($cursor, $end);
553
                        break;
554
                    }
555
                    $cursor++;
556
                }
557
            }
558
        }
559
    }
560
561
    /**
562
     * Parse MainStreamsInfo to get packed/unpacked sizes and detect solid archives.
563
     */
564
    private function parseMainStreamsInfo(int $cursor, int $limit): int
565
    {
566
        while ($cursor < $limit) {
567
            $id = ord($this->data[$cursor]);
568
            $cursor++;
569
570
            if ($id === self::K_END) {
571
                return $cursor;
572
            }
573
574
            switch ($id) {
575
                case self::K_PACK_INFO:
576
                    $cursor = $this->parsePackInfo($cursor, $limit);
577
                    break;
578
579
                case self::K_UNPACK_INFO:
580
                    $cursor = $this->parseUnpackInfo($cursor, $limit);
581
                    break;
582
583
                case self::K_SUBSTREAMS_INFO:
584
                    $cursor = $this->parseSubstreamsInfo($cursor, $limit);
585
                    break;
586
587
                default:
588
                    return $this->skipUntilEnd($cursor - 1, $limit);
589
            }
590
591
            if ($cursor === -1) {
592
                return -1;
593
            }
594
        }
595
596
        return $cursor;
597
    }
598
599
    /**
600
     * Parse PackInfo to get packed sizes.
601
     */
602
    private function parsePackInfo(int $cursor, int $limit): int
603
    {
604
        // PackPos (VInt)
605
        if (! $this->readVIntAt($cursor, $packPos, $cursor, $limit)) {
606
            return -1;
607
        }
608
609
        // NumPackStreams (VInt)
610
        if (! $this->readVIntAt($cursor, $numPackStreams, $cursor, $limit)) {
611
            return -1;
612
        }
613
614
        while ($cursor < $limit) {
615
            $id = ord($this->data[$cursor]);
616
            $cursor++;
617
618
            if ($id === self::K_END) {
619
                return $cursor;
620
            }
621
622
            if ($id === self::K_SIZE) {
623
                // Read packed sizes
624
                for ($i = 0; $i < $numPackStreams && $cursor < $limit; $i++) {
625
                    if ($this->readVIntAt($cursor, $size, $cursor, $limit)) {
626
                        $this->packedSizes[] = $size;
627
                        $this->totalPackedSize += $size;
628
                    }
629
                }
630
            } elseif ($id === self::K_CRC) {
631
                // Skip CRC info
632
                $cursor = $this->skipBitVector($cursor, $limit, $numPackStreams);
633
            } else {
634
                return -1;
635
            }
636
        }
637
638
        return $cursor;
639
    }
640
641
    /**
642
     * Parse UnpackInfo to get unpack sizes and detect compression methods.
643
     */
644
    private function parseUnpackInfo(int $cursor, int $limit): int
645
    {
646
        while ($cursor < $limit) {
647
            $id = ord($this->data[$cursor]);
648
            $cursor++;
649
650
            if ($id === self::K_END) {
651
                return $cursor;
652
            }
653
654
            if ($id === self::K_FOLDER) {
655
                $cursor = $this->parseFolderInfo($cursor, $limit);
656
            } elseif ($id === self::K_CODERS_UNPACK_SIZE) {
657
                $cursor = $this->parseCodersUnpackSize($cursor, $limit);
658
            } elseif ($id === self::K_CRC) {
659
                if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
660
                    return -1;
661
                }
662
                $cursor += $propSize;
663
            } else {
664
                if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
665
                    return -1;
666
                }
667
                $cursor += $propSize;
668
            }
669
670
            if ($cursor === -1) {
671
                return -1;
672
            }
673
        }
674
675
        return $cursor;
676
    }
677
678
    /**
679
     * Parse FolderInfo to detect compression methods and solid archive structure.
680
     */
681
    private function parseFolderInfo(int $cursor, int $limit): int
682
    {
683
        // NumFolders (VInt)
684
        if (! $this->readVIntAt($cursor, $numFolders, $cursor, $limit)) {
685
            return -1;
686
        }
687
688
        // If there's only one folder with multiple files, it's likely solid
689
        if ($numFolders === 1 && $this->numFiles > 1) {
690
            $this->solidArchive = true;
691
        }
692
693
        // External flag
694
        if ($cursor >= $limit) {
695
            return -1;
696
        }
697
        $external = ord($this->data[$cursor]);
698
        $cursor++;
699
700
        if ($external !== 0) {
701
            // External data - skip
702
            if (! $this->readVIntAt($cursor, $dataIndex, $cursor, $limit)) {
703
                return -1;
704
            }
705
706
            return $cursor;
707
        }
708
709
        // Parse each folder
710
        for ($i = 0; $i < $numFolders && $cursor < $limit; $i++) {
711
            $cursor = $this->parseFolder($cursor, $limit);
712
            if ($cursor === -1) {
713
                return -1;
714
            }
715
        }
716
717
        return $cursor;
718
    }
719
720
    /**
721
     * Parse a single Folder structure to extract compression method info.
722
     */
723
    private function parseFolder(int $cursor, int $limit): int
724
    {
725
        // NumCoders (VInt)
726
        if (! $this->readVIntAt($cursor, $numCoders, $cursor, $limit)) {
727
            return -1;
728
        }
729
730
        $totalInputStreams = 0;
731
        $totalOutputStreams = 0;
732
733
        for ($i = 0; $i < $numCoders && $cursor < $limit; $i++) {
734
            // Coder flags
735
            $flags = ord($this->data[$cursor]);
736
            $cursor++;
737
738
            $codecIdSize = $flags & 0x0F;
739
            $isComplex = ($flags & 0x10) !== 0;
740
            $hasAttributes = ($flags & 0x20) !== 0;
741
742
            // Codec ID
743
            if ($cursor + $codecIdSize > $limit) {
744
                return -1;
745
            }
746
            $codecId = substr($this->data, $cursor, $codecIdSize);
747
            $cursor += $codecIdSize;
748
749
            // Identify compression method
750
            $method = $this->identifyCompressionMethod($codecId);
751
            if ($method !== null) {
752
                $this->compressionMethods[] = $method;
753
754
                // Check for AES encryption
755
                if ($method === 'AES-256') {
756
                    $this->encrypted = true;
757
                }
758
            }
759
760
            if ($isComplex) {
761
                // NumInStreams (VInt)
762
                if (! $this->readVIntAt($cursor, $numIn, $cursor, $limit)) {
763
                    return -1;
764
                }
765
                // NumOutStreams (VInt)
766
                if (! $this->readVIntAt($cursor, $numOut, $cursor, $limit)) {
767
                    return -1;
768
                }
769
                $totalInputStreams += $numIn;
770
                $totalOutputStreams += $numOut;
771
            } else {
772
                $totalInputStreams++;
773
                $totalOutputStreams++;
774
            }
775
776
            if ($hasAttributes) {
777
                // PropertiesSize (VInt)
778
                if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
779
                    return -1;
780
                }
781
                $cursor += $propSize;
782
            }
783
        }
784
785
        // BindPairs
786
        $numBindPairs = $totalOutputStreams - 1;
787
        for ($i = 0; $i < $numBindPairs && $cursor < $limit; $i++) {
788
            if (! $this->readVIntAt($cursor, $inIndex, $cursor, $limit)) {
789
                return -1;
790
            }
791
            if (! $this->readVIntAt($cursor, $outIndex, $cursor, $limit)) {
792
                return -1;
793
            }
794
        }
795
796
        // PackedStreams
797
        $numPackedStreams = $totalInputStreams - $numBindPairs;
798
        if ($numPackedStreams > 1) {
799
            for ($i = 0; $i < $numPackedStreams && $cursor < $limit; $i++) {
800
                if (! $this->readVIntAt($cursor, $packedIndex, $cursor, $limit)) {
801
                    return -1;
802
                }
803
            }
804
        }
805
806
        return $cursor;
807
    }
808
809
    /**
810
     * Identify compression method from codec ID bytes.
811
     */
812
    private function identifyCompressionMethod(string $codecId): ?string
813
    {
814
        if ($codecId === self::METHOD_COPY || $codecId === "\x00") {
815
            return 'Copy';
816
        }
817
        if (str_starts_with($codecId, "\x03\x01\x01")) {
818
            return 'LZMA';
819
        }
820
        if ($codecId === self::METHOD_LZMA2 || str_starts_with($codecId, "\x21")) {
821
            return 'LZMA2';
822
        }
823
        if (str_starts_with($codecId, "\x03\x04\x01")) {
824
            return 'PPMd';
825
        }
826
        if (str_starts_with($codecId, "\x03\x03\x01\x03")) {
827
            return 'BCJ';
828
        }
829
        if (str_starts_with($codecId, "\x03\x03\x01\x1B")) {
830
            return 'BCJ2';
831
        }
832
        if (str_starts_with($codecId, "\x04\x01\x08")) {
833
            return 'Deflate';
834
        }
835
        if (str_starts_with($codecId, "\x04\x02\x02")) {
836
            return 'BZip2';
837
        }
838
        if (str_starts_with($codecId, "\x06\xF1\x07\x01")) {
839
            return 'AES-256';
840
        }
841
        if (str_starts_with($codecId, "\x03\x03\x01\x05")) {
842
            return 'ARM';
843
        }
844
        if (str_starts_with($codecId, "\x03\x03\x01\x08")) {
845
            return 'SPARC';
846
        }
847
848
        return null;
849
    }
850
851
    /**
852
     * Parse CodersUnpackSize.
853
     */
854
    private function parseCodersUnpackSize(int $cursor, int $limit): int
855
    {
856
        // This should read sizes for each output stream of each folder
857
        // For now, just read available VInts as sizes
858
        while ($cursor < $limit) {
859
            $b = ord($this->data[$cursor]);
860
            if ($b === self::K_END || $b === self::K_CRC) {
861
                break;
862
            }
863
            if ($this->readVIntAt($cursor, $size, $newCursor, $limit)) {
864
                $this->sizes[] = $size;
865
                $this->totalUnpackedSize += $size;
866
                $cursor = $newCursor;
867
            } else {
868
                break;
869
            }
870
        }
871
872
        return $cursor;
873
    }
874
875
    /**
876
     * Parse SubstreamsInfo for individual file sizes within folders.
877
     */
878
    private function parseSubstreamsInfo(int $cursor, int $limit): int
879
    {
880
        while ($cursor < $limit) {
881
            $id = ord($this->data[$cursor]);
882
            $cursor++;
883
884
            if ($id === self::K_END) {
885
                return $cursor;
886
            }
887
888
            switch ($id) {
889
                case self::K_NUM_UNPACK_STREAM:
890
                    // NumUnpackStreams per folder
891
                    if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
892
                        return -1;
893
                    }
894
                    $cursor += $propSize;
895
                    break;
896
897
                case self::K_SIZE:
898
                    // Sizes
899
                    if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
900
                        return -1;
901
                    }
902
                    $cursor += $propSize;
903
                    break;
904
905
                case self::K_CRC:
906
                    // CRCs
907
                    if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
908
                        return -1;
909
                    }
910
                    $cursor += $propSize;
911
                    break;
912
913
                default:
914
                    if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
915
                        return -1;
916
                    }
917
                    $cursor += $propSize;
918
                    break;
919
            }
920
        }
921
922
        return $cursor;
923
    }
924
925
    /**
926
     * Skip a bit vector structure.
927
     */
928
    private function skipBitVector(int $cursor, int $limit, int $numItems): int
929
    {
930
        $allDefined = ord($this->data[$cursor]);
931
        $cursor++;
932
933
        if ($allDefined === 0) {
934
            // Bit array follows
935
            $numBytes = (int) ceil($numItems / 8);
936
            $cursor += $numBytes;
937
        }
938
939
        // Then comes the actual CRC values
940
        for ($i = 0; $i < $numItems && $cursor + 4 <= $limit; $i++) {
941
            $cursor += 4;
942
        }
943
944
        return $cursor;
945
    }
946
947
    private function parseFilesInfo(int $cursor, int $limit): int
948
    {
949
        // Number of files (VInt)
950
        $numFiles = $this->readVIntAt($cursor, $value, $newCursor, $limit) ? $value : null;
951
        if ($numFiles === null || $numFiles < 0 || $numFiles > 100000) { // sanity cap
952
            $this->lastError = 'Invalid number of files: '.$numFiles;
953
954
            return $limit;
955
        }
956
        $cursor = $newCursor;
957
        $this->numFiles = $numFiles;
958
959
        // Initialize arrays for metadata
960
        $names = [];
961
        $emptyStreams = [];
962
        $emptyFiles = [];
0 ignored issues
show
Unused Code introduced by
The assignment to $emptyFiles is dead and can be removed.
Loading history...
963
        $antiFiles = [];
0 ignored issues
show
Unused Code introduced by
The assignment to $antiFiles is dead and can be removed.
Loading history...
964
965
        // Property loop until K_END
966
        while ($cursor < $limit) {
967
            $propId = ord($this->data[$cursor]);
968
            $cursor++;
969
970
            if ($propId === self::K_END) {
971
                break;
972
            }
973
974
            // Size of property data (VInt)
975
            if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
976
                break;
977
            }
978
            if ($propSize < 0 || $propSize > ($limit - $cursor)) {
979
                break;
980
            }
981
982
            $propStart = $cursor;
983
984
            switch ($propId) {
985
                case self::K_EMPTY_STREAM:
986
                    $emptyStreams = $this->parseBitVector($cursor, $propSize, $numFiles);
0 ignored issues
show
Bug introduced by
It seems like $numFiles can also be of type null; however, parameter $numItems of App\Services\Archives\Se...arser::parseBitVector() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

986
                    $emptyStreams = $this->parseBitVector($cursor, $propSize, /** @scrutinizer ignore-type */ $numFiles);
Loading history...
987
                    $cursor = $propStart + $propSize;
988
                    break;
989
990
                case self::K_EMPTY_FILE:
991
                    $numEmpty = count(array_filter($emptyStreams));
992
                    $emptyFiles = $this->parseBitVector($cursor, $propSize, $numEmpty);
993
                    $cursor = $propStart + $propSize;
994
                    break;
995
996
                case self::K_ANTI:
997
                    $numEmpty = count(array_filter($emptyStreams));
998
                    $antiFiles = $this->parseBitVector($cursor, $propSize, $numEmpty);
999
                    $cursor = $propStart + $propSize;
1000
                    break;
1001
1002
                case self::K_NAME:
1003
                    $names = $this->parseNames($cursor, $propSize, $numFiles);
0 ignored issues
show
Bug introduced by
It seems like $numFiles can also be of type null; however, parameter $numFiles of App\Services\Archives\Se...ialParser::parseNames() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1003
                    $names = $this->parseNames($cursor, $propSize, /** @scrutinizer ignore-type */ $numFiles);
Loading history...
1004
                    $cursor = $propStart + $propSize;
1005
                    break;
1006
1007
                case self::K_MTIME:
1008
                    $this->mtimes = $this->parseFileTimes($cursor, $propSize, $numFiles);
0 ignored issues
show
Bug introduced by
It seems like $numFiles can also be of type null; however, parameter $numFiles of App\Services\Archives\Se...arser::parseFileTimes() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1008
                    $this->mtimes = $this->parseFileTimes($cursor, $propSize, /** @scrutinizer ignore-type */ $numFiles);
Loading history...
1009
                    $cursor = $propStart + $propSize;
1010
                    break;
1011
1012
                case self::K_CTIME:
1013
                    $this->ctimes = $this->parseFileTimes($cursor, $propSize, $numFiles);
1014
                    $cursor = $propStart + $propSize;
1015
                    break;
1016
1017
                case self::K_ATIME:
1018
                    $this->atimes = $this->parseFileTimes($cursor, $propSize, $numFiles);
1019
                    $cursor = $propStart + $propSize;
1020
                    break;
1021
1022
                case self::K_WIN_ATTRIBUTES:
1023
                    $this->attributes = $this->parseAttributes($cursor, $propSize, $numFiles);
0 ignored issues
show
Bug introduced by
It seems like $numFiles can also be of type null; however, parameter $numFiles of App\Services\Archives\Se...rser::parseAttributes() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1023
                    $this->attributes = $this->parseAttributes($cursor, $propSize, /** @scrutinizer ignore-type */ $numFiles);
Loading history...
1024
                    $cursor = $propStart + $propSize;
1025
                    break;
1026
1027
                case self::K_CRC:
1028
                    $this->crcs = $this->parseCRCs($cursor, $propSize, $numFiles);
0 ignored issues
show
Bug introduced by
It seems like $numFiles can also be of type null; however, parameter $numFiles of App\Services\Archives\Se...tialParser::parseCRCs() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1028
                    $this->crcs = $this->parseCRCs($cursor, $propSize, /** @scrutinizer ignore-type */ $numFiles);
Loading history...
1029
                    $cursor = $propStart + $propSize;
1030
                    break;
1031
1032
                default:
1033
                    // Skip unknown property
1034
                    $cursor = $propStart + $propSize;
1035
                    break;
1036
            }
1037
        }
1038
1039
        // Assign collected names
1040
        if (! empty($names)) {
1041
            $this->names = array_values(array_unique($names));
1042
        }
1043
1044
        return $cursor;
1045
    }
1046
1047
    /**
1048
     * Parse file names from K_NAME property.
1049
     */
1050
    private function parseNames(int $cursor, int $propSize, int $numFiles): array
1051
    {
1052
        $names = [];
1053
1054
        if ($propSize < 1) {
1055
            return $names;
1056
        }
1057
1058
        $external = ord($this->data[$cursor]);
1059
        if ($external !== 0) { // External data not supported
1060
            return $names;
1061
        }
1062
1063
        $nameBytes = $propSize - 1;
1064
        $cursor++;
1065
1066
        if ($nameBytes <= 0) {
1067
            return $names;
1068
        }
1069
1070
        $blob = substr($this->data, $cursor, $nameBytes);
1071
1072
        // Ensure even length for UTF-16LE. Truncate last byte if odd.
1073
        if (($nameBytes & 1) === 1) {
1074
            $blob = substr($blob, 0, -1);
1075
        }
1076
1077
        // Split on UTF-16LE null terminators (00 00)
1078
        $segments = preg_split('/\x00\x00/', $blob);
1079
1080
        foreach ($segments as $seg) {
1081
            if ($seg === '') {
1082
                continue;
1083
            }
1084
1085
            $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $seg);
1086
            if ($utf8 === false) {
1087
                continue;
1088
            }
1089
1090
            $utf8 = trim($utf8);
1091
            if ($utf8 === '') {
1092
                continue;
1093
            }
1094
1095
            // Normalize path separators
1096
            $utf8Clean = str_replace(['\\'], '/', $utf8);
1097
            // Remove leading './'
1098
            $utf8Clean = preg_replace('#^\./#', '', $utf8Clean);
1099
1100
            if ($utf8Clean === '' || substr_count($utf8Clean, '/') > 16) { // excessive depth -> skip
1101
                continue;
1102
            }
1103
1104
            $names[] = $utf8Clean;
1105
1106
            if (count($names) >= $numFiles) {
1107
                break;
1108
            }
1109
        }
1110
1111
        return $names;
1112
    }
1113
1114
    /**
1115
     * Parse a bit vector from property data.
1116
     *
1117
     * @return array<int, bool>
1118
     */
1119
    private function parseBitVector(int $cursor, int $propSize, int $numItems): array
1120
    {
1121
        $result = array_fill(0, $numItems, false);
1122
1123
        if ($propSize < 1) {
1124
            return $result;
1125
        }
1126
1127
        $allDefined = ord($this->data[$cursor]);
1128
        $cursor++;
1129
1130
        if ($allDefined !== 0) {
1131
            // All items are defined
1132
            return array_fill(0, $numItems, true);
1133
        }
1134
1135
        // Parse bit array
1136
        $numBytes = (int) ceil($numItems / 8);
0 ignored issues
show
Unused Code introduced by
The assignment to $numBytes is dead and can be removed.
Loading history...
1137
        for ($i = 0; $i < $numItems; $i++) {
1138
            $byteIndex = (int) ($i / 8);
1139
            $bitIndex = 7 - ($i % 8);
1140
            if ($cursor + $byteIndex < $this->len) {
1141
                $byte = ord($this->data[$cursor + $byteIndex]);
1142
                $result[$i] = (($byte >> $bitIndex) & 1) === 1;
1143
            }
1144
        }
1145
1146
        return $result;
1147
    }
1148
1149
    /**
1150
     * Parse file times from property data.
1151
     *
1152
     * @return array<int, int|null>
1153
     */
1154
    private function parseFileTimes(int $cursor, int $propSize, int $numFiles): array
1155
    {
1156
        $times = [];
1157
1158
        if ($propSize < 1) {
1159
            return $times;
1160
        }
1161
1162
        // Check for AllDefined byte
1163
        $allDefined = ord($this->data[$cursor]);
1164
        $cursor++;
1165
1166
        $definedBits = [];
1167
        if ($allDefined === 0) {
1168
            // Parse bit vector for which files have times defined
1169
            $numBytes = (int) ceil($numFiles / 8);
1170
            for ($i = 0; $i < $numFiles; $i++) {
1171
                $byteIndex = (int) ($i / 8);
1172
                $bitIndex = 7 - ($i % 8);
1173
                if ($cursor + $byteIndex < $this->len) {
1174
                    $byte = ord($this->data[$cursor + $byteIndex]);
1175
                    $definedBits[$i] = (($byte >> $bitIndex) & 1) === 1;
1176
                } else {
1177
                    $definedBits[$i] = false;
1178
                }
1179
            }
1180
            $cursor += $numBytes;
1181
        } else {
1182
            $definedBits = array_fill(0, $numFiles, true);
1183
        }
1184
1185
        // External flag
1186
        if ($cursor >= $this->len) {
1187
            return $times;
1188
        }
1189
        $external = ord($this->data[$cursor]);
1190
        $cursor++;
1191
1192
        if ($external !== 0) {
1193
            return $times; // External data not supported
1194
        }
1195
1196
        // Read times for defined files (FILETIME format - 8 bytes each)
1197
        for ($i = 0; $i < $numFiles; $i++) {
1198
            if (! empty($definedBits[$i]) && $cursor + 8 <= $this->len) {
1199
                $filetime = $this->readUInt64LE($cursor);
1200
                // Convert FILETIME (100-ns intervals since 1601) to Unix timestamp
1201
                $times[$i] = $this->filetimeToUnix($filetime);
1202
                $cursor += 8;
1203
            } else {
1204
                $times[$i] = null;
1205
            }
1206
        }
1207
1208
        return $times;
1209
    }
1210
1211
    /**
1212
     * Parse Windows attributes from property data.
1213
     *
1214
     * @return array<int, int|null>
1215
     */
1216
    private function parseAttributes(int $cursor, int $propSize, int $numFiles): array
1217
    {
1218
        $attrs = [];
1219
1220
        if ($propSize < 1) {
1221
            return $attrs;
1222
        }
1223
1224
        // AllDefined byte
1225
        $allDefined = ord($this->data[$cursor]);
1226
        $cursor++;
1227
1228
        $definedBits = [];
1229
        if ($allDefined === 0) {
1230
            $numBytes = (int) ceil($numFiles / 8);
1231
            for ($i = 0; $i < $numFiles; $i++) {
1232
                $byteIndex = (int) ($i / 8);
1233
                $bitIndex = 7 - ($i % 8);
1234
                if ($cursor + $byteIndex < $this->len) {
1235
                    $byte = ord($this->data[$cursor + $byteIndex]);
1236
                    $definedBits[$i] = (($byte >> $bitIndex) & 1) === 1;
1237
                } else {
1238
                    $definedBits[$i] = false;
1239
                }
1240
            }
1241
            $cursor += $numBytes;
1242
        } else {
1243
            $definedBits = array_fill(0, $numFiles, true);
1244
        }
1245
1246
        // External flag
1247
        if ($cursor >= $this->len) {
1248
            return $attrs;
1249
        }
1250
        $external = ord($this->data[$cursor]);
1251
        $cursor++;
1252
1253
        if ($external !== 0) {
1254
            return $attrs;
1255
        }
1256
1257
        // Read attributes (4 bytes each)
1258
        for ($i = 0; $i < $numFiles; $i++) {
1259
            if (! empty($definedBits[$i]) && $cursor + 4 <= $this->len) {
1260
                $attrs[$i] = $this->readUInt32LE($cursor);
1261
                $cursor += 4;
1262
            } else {
1263
                $attrs[$i] = null;
1264
            }
1265
        }
1266
1267
        return $attrs;
1268
    }
1269
1270
    /**
1271
     * Parse CRC values from property data.
1272
     *
1273
     * @return array<int, string|null>
1274
     */
1275
    private function parseCRCs(int $cursor, int $propSize, int $numFiles): array
1276
    {
1277
        $crcs = [];
1278
1279
        if ($propSize < 1) {
1280
            return $crcs;
1281
        }
1282
1283
        // AllDefined byte
1284
        $allDefined = ord($this->data[$cursor]);
1285
        $cursor++;
1286
1287
        $definedBits = [];
1288
        if ($allDefined === 0) {
1289
            $numBytes = (int) ceil($numFiles / 8);
1290
            for ($i = 0; $i < $numFiles; $i++) {
1291
                $byteIndex = (int) ($i / 8);
1292
                $bitIndex = 7 - ($i % 8);
1293
                if ($cursor + $byteIndex < $this->len) {
1294
                    $byte = ord($this->data[$cursor + $byteIndex]);
1295
                    $definedBits[$i] = (($byte >> $bitIndex) & 1) === 1;
1296
                } else {
1297
                    $definedBits[$i] = false;
1298
                }
1299
            }
1300
            $cursor += $numBytes;
1301
        } else {
1302
            $definedBits = array_fill(0, $numFiles, true);
1303
        }
1304
1305
        // Read CRC values (4 bytes each)
1306
        for ($i = 0; $i < $numFiles; $i++) {
1307
            if (! empty($definedBits[$i]) && $cursor + 4 <= $this->len) {
1308
                $crc = $this->readUInt32LE($cursor);
1309
                $crcs[$i] = sprintf('%08X', $crc);
1310
                $cursor += 4;
1311
            } else {
1312
                $crcs[$i] = null;
1313
            }
1314
        }
1315
1316
        return $crcs;
1317
    }
1318
1319
    /**
1320
     * Build consolidated file info array.
1321
     */
1322
    private function buildFileInfo(): void
1323
    {
1324
        $numFiles = max(count($this->names), $this->numFiles);
1325
1326
        for ($i = 0; $i < $numFiles; $i++) {
1327
            $isDir = false;
1328
            if (isset($this->attributes[$i])) {
1329
                $isDir = ($this->attributes[$i] & self::FILE_ATTRIBUTE_DIRECTORY) !== 0;
1330
            }
1331
1332
            $this->files[$i] = [
1333
                'name' => $this->names[$i] ?? null,
1334
                'size' => $this->sizes[$i] ?? null,
1335
                'packed_size' => $this->packedSizes[$i] ?? null,
1336
                'crc' => $this->crcs[$i] ?? null,
1337
                'attributes' => $this->attributes[$i] ?? null,
1338
                'is_dir' => $isDir,
1339
                'mtime' => $this->mtimes[$i] ?? null,
1340
                'ctime' => $this->ctimes[$i] ?? null,
1341
                'atime' => $this->atimes[$i] ?? null,
1342
            ];
1343
        }
1344
    }
1345
1346
    /**
1347
     * Scan raw data for compression method signatures.
1348
     */
1349
    private function scanForCompressionMethods(int $start, int $end): void
1350
    {
1351
        $data = substr($this->data, $start, $end - $start);
1352
1353
        // Look for common method signatures
1354
        if (strpos($data, self::METHOD_AES) !== false) {
1355
            $this->encrypted = true;
1356
            $this->compressionMethods[] = 'AES-256';
1357
        }
1358
        if (strpos($data, self::METHOD_LZMA2) !== false) {
1359
            $this->compressionMethods[] = 'LZMA2';
1360
        }
1361
        if (strpos($data, self::METHOD_LZMA) !== false) {
1362
            $this->compressionMethods[] = 'LZMA';
1363
        }
1364
        if (strpos($data, self::METHOD_PPMD) !== false) {
1365
            $this->compressionMethods[] = 'PPMd';
1366
        }
1367
        if (strpos($data, self::METHOD_BZIP2) !== false) {
1368
            $this->compressionMethods[] = 'BZip2';
1369
        }
1370
        if (strpos($data, self::METHOD_DEFLATE) !== false) {
1371
            $this->compressionMethods[] = 'Deflate';
1372
        }
1373
    }
1374
1375
    /**
1376
     * Fallback scanning for filenames when normal parsing fails.
1377
     * Attempts to find UTF-16LE encoded filenames in the raw data.
1378
     */
1379
    private function fallbackScanForFilenames(): void
1380
    {
1381
        if (! empty($this->names)) {
1382
            return; // Already have names
1383
        }
1384
1385
        // Look for common file extension patterns in UTF-16LE
1386
        $patterns = [
1387
            '.avi', '.mkv', '.mp4', '.wmv', '.mov',
1388
            '.mp3', '.flac', '.wav', '.ogg',
1389
            '.rar', '.zip', '.exe', '.dll',
1390
            '.nfo', '.txt', '.pdf', '.doc',
1391
            '.jpg', '.png', '.gif', '.bmp',
1392
            '.iso', '.bin', '.img', '.nrg',
1393
        ];
1394
1395
        $found = [];
1396
1397
        foreach ($patterns as $ext) {
1398
            // Convert extension to UTF-16LE for searching
1399
            $utf16Ext = @iconv('UTF-8', 'UTF-16LE', $ext);
1400
            if ($utf16Ext === false) {
1401
                continue;
1402
            }
1403
1404
            $pos = 0;
1405
            while (($pos = strpos($this->data, $utf16Ext, $pos)) !== false) {
1406
                // Try to extract the full filename by looking backwards
1407
                $start = $this->findFilenameStart($pos);
1408
                if ($start !== false && $start < $pos) {
1409
                    $len = ($pos - $start) + strlen($utf16Ext);
1410
                    $nameData = substr($this->data, $start, $len);
1411
1412
                    // Validate it's likely a filename (reasonable length, no control chars)
1413
                    if (strlen($nameData) > 2 && strlen($nameData) < 512) {
1414
                        $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $nameData);
1415
                        if ($utf8 !== false && $this->isValidFilename($utf8)) {
1416
                            $utf8 = str_replace('\\', '/', $utf8);
1417
                            $found[] = $utf8;
1418
                        }
1419
                    }
1420
                }
1421
                $pos += strlen($utf16Ext);
1422
            }
1423
        }
1424
1425
        // Also try to find NFO files specifically (common in releases)
1426
        $this->scanForNfoFiles($found);
1427
1428
        if (! empty($found)) {
1429
            $this->names = array_values(array_unique($found));
1430
        }
1431
    }
1432
1433
    /**
1434
     * Find the start of a UTF-16LE filename by scanning backwards.
1435
     */
1436
    private function findFilenameStart(int $extensionPos): int|false
1437
    {
1438
        // Scan backwards looking for a null terminator or invalid char
1439
        $start = $extensionPos;
1440
        $maxLen = 256; // Max filename length to consider
1441
1442
        for ($i = $extensionPos - 2; $i >= max(0, $extensionPos - $maxLen * 2); $i -= 2) {
1443
            if ($i + 1 >= $this->len) {
1444
                continue;
1445
            }
1446
1447
            $lo = ord($this->data[$i]);
1448
            $hi = ord($this->data[$i + 1]);
1449
1450
            // Check for null terminator
1451
            if ($lo === 0 && $hi === 0) {
1452
                $start = $i + 2;
1453
                break;
1454
            }
1455
1456
            // Check for invalid filename characters
1457
            if ($hi === 0) {
1458
                // ASCII range - check for path separators or invalid chars
1459
                if ($lo === 0 || $lo < 32) {
1460
                    $start = $i + 2;
1461
                    break;
1462
                }
1463
            }
1464
1465
            $start = $i;
1466
        }
1467
1468
        return $start >= 0 ? $start : false;
1469
    }
1470
1471
    /**
1472
     * Validate if a string looks like a valid filename.
1473
     */
1474
    private function isValidFilename(string $name): bool
1475
    {
1476
        $name = trim($name);
1477
1478
        if (empty($name) || strlen($name) > 260) {
1479
            return false;
1480
        }
1481
1482
        // Must have at least one printable character
1483
        if (! preg_match('/[a-zA-Z0-9]/', $name)) {
1484
            return false;
1485
        }
1486
1487
        // Should not have too many special characters
1488
        $specialCount = preg_match_all('/[^\w\s.\-_\/\\\\]/', $name);
1489
        if ($specialCount > 5) {
1490
            return false;
1491
        }
1492
1493
        return true;
1494
    }
1495
1496
    /**
1497
     * Scan for NFO files which are very common in release archives.
1498
     */
1499
    private function scanForNfoFiles(array &$found): void
1500
    {
1501
        // NFO files are extremely common and usually have simple names
1502
        $nfoMarker = @iconv('UTF-8', 'UTF-16LE', '.nfo');
1503
        if ($nfoMarker === false) {
1504
            return;
1505
        }
1506
1507
        $pos = 0;
1508
        while (($pos = strpos($this->data, $nfoMarker, $pos)) !== false) {
1509
            $start = $this->findFilenameStart($pos);
1510
            if ($start !== false && $start < $pos) {
1511
                $len = ($pos - $start) + strlen($nfoMarker);
1512
                $nameData = substr($this->data, $start, $len);
1513
1514
                if (strlen($nameData) > 2 && strlen($nameData) < 256) {
1515
                    $utf8 = @iconv('UTF-16LE', 'UTF-8//IGNORE', $nameData);
1516
                    if ($utf8 !== false && $this->isValidFilename($utf8)) {
1517
                        $found[] = str_replace('\\', '/', $utf8);
1518
                    }
1519
                }
1520
            }
1521
            $pos += strlen($nfoMarker);
1522
        }
1523
    }
1524
1525
    /**
1526
     * Convert Windows FILETIME to Unix timestamp.
1527
     */
1528
    private function filetimeToUnix(int $filetime): int
1529
    {
1530
        // FILETIME is 100-nanosecond intervals since January 1, 1601
1531
        // Unix epoch is January 1, 1970
1532
        // Difference is 116444736000000000 100-ns intervals
1533
        $unixEpochDiff = 116444736000000000;
1534
1535
        if ($filetime <= $unixEpochDiff) {
1536
            return 0;
1537
        }
1538
1539
        return (int) (($filetime - $unixEpochDiff) / 10000000);
1540
    }
1541
1542
    private function skipUntilEnd(int $cursor, int $limit): int
1543
    {
1544
        while ($cursor < $limit) {
1545
            $id = ord($this->data[$cursor]);
1546
            $cursor++;
1547
            if ($id === self::K_END) {
1548
                return $cursor;
1549
            }
1550
            // Property-like: read size then skip
1551
            if (! $this->readVIntAt($cursor, $propSize, $cursor, $limit)) {
1552
                return -1;
1553
            }
1554
            if ($propSize < 0 || $propSize > ($limit - $cursor)) {
1555
                return -1;
1556
            }
1557
            $cursor += $propSize;
1558
        }
1559
1560
        return -1;
1561
    }
1562
1563
    /**
1564
     * Reads a 7z variable-length integer at offset, returns value via reference.
1565
     */
1566
    private function readVIntAt(int $offset, ?int &$value, ?int &$newOffset, int $limit): bool
1567
    {
1568
        $value = 0;
1569
        $shift = 0;
1570
        $pos = $offset;
1571
        while ($pos < $limit && $shift <= 63) {
1572
            $b = ord($this->data[$pos]);
1573
            $pos++;
1574
            $value |= ($b & 0x7F) << $shift;
1575
            if (($b & 0x80) === 0) {
1576
                $newOffset = $pos;
1577
1578
                return true;
1579
            }
1580
            $shift += 7;
1581
        }
1582
1583
        return false;
1584
    }
1585
1586
    private function readUInt64LE(int $offset): int
1587
    {
1588
        if ($offset + 8 > $this->len) {
1589
            return 0;
1590
        }
1591
        $v = 0;
1592
        for ($i = 0; $i < 8; $i++) {
1593
            $v |= ord($this->data[$offset + $i]) << ($i * 8);
1594
        }
1595
1596
        // Constrain to PHP int (on 64-bit fine; on 32-bit may overflow but those environments uncommon here)
1597
        return $v & 0xFFFFFFFFFFFFFFFF;
1598
    }
1599
1600
    /**
1601
     * Read a 32-bit unsigned little-endian integer.
1602
     */
1603
    private function readUInt32LE(int $offset): int
1604
    {
1605
        if ($offset + 4 > $this->len) {
1606
            return 0;
1607
        }
1608
1609
        return ord($this->data[$offset])
1610
            | (ord($this->data[$offset + 1]) << 8)
1611
            | (ord($this->data[$offset + 2]) << 16)
1612
            | (ord($this->data[$offset + 3]) << 24);
1613
    }
1614
1615
    /**
1616
     * Extract a summary of archive information as an associative array.
1617
     * Useful for quick inspection of archive contents.
1618
     */
1619
    public function getSummary(): array
1620
    {
1621
        if (! $this->parsed) {
1622
            $this->parse();
1623
        }
1624
1625
        $dirs = 0;
1626
        $regularFiles = 0;
1627
1628
        foreach ($this->files as $file) {
1629
            if ($file['is_dir']) {
1630
                $dirs++;
1631
            } else {
1632
                $regularFiles++;
1633
            }
1634
        }
1635
1636
        return [
1637
            'valid_signature' => $this->isValid7zSignature(),
1638
            'file_count' => $this->numFiles,
1639
            'directory_count' => $dirs,
1640
            'regular_file_count' => $regularFiles,
1641
            'total_unpacked_size' => $this->totalUnpackedSize,
1642
            'total_packed_size' => $this->totalPackedSize,
1643
            'compression_ratio' => $this->getCompressionRatio(),
1644
            'compression_methods' => $this->getCompressionMethods(),
1645
            'encrypted' => $this->encrypted,
1646
            'header_encrypted' => $this->headerEncrypted,
1647
            'encoded_header' => $this->encodedHeader,
1648
            'solid_archive' => $this->solidArchive,
1649
            'last_error' => $this->lastError,
1650
        ];
1651
    }
1652
1653
    /**
1654
     * Get files matching a specific extension.
1655
     *
1656
     * @return array<int, array>
1657
     */
1658
    public function getFilesByExtension(string $extension): array
1659
    {
1660
        if (! $this->parsed) {
1661
            $this->parse();
1662
        }
1663
1664
        $extension = ltrim(strtolower($extension), '.');
1665
        $matches = [];
1666
1667
        foreach ($this->files as $index => $file) {
1668
            if ($file['name'] === null) {
1669
                continue;
1670
            }
1671
1672
            $fileExt = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION));
0 ignored issues
show
Bug introduced by
It seems like pathinfo($file['name'], ...ves\PATHINFO_EXTENSION) can also be of type array; however, parameter $string of strtolower() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1672
            $fileExt = strtolower(/** @scrutinizer ignore-type */ pathinfo($file['name'], PATHINFO_EXTENSION));
Loading history...
1673
            if ($fileExt === $extension) {
1674
                $matches[$index] = $file;
1675
            }
1676
        }
1677
1678
        return $matches;
1679
    }
1680
1681
    /**
1682
     * Check if archive contains any files with given extension.
1683
     */
1684
    public function hasFileWithExtension(string $extension): bool
1685
    {
1686
        return ! empty($this->getFilesByExtension($extension));
1687
    }
1688
1689
    /**
1690
     * Get all directory entries from the archive.
1691
     *
1692
     * @return array<int, array>
1693
     */
1694
    public function getDirectories(): array
1695
    {
1696
        if (! $this->parsed) {
1697
            $this->parse();
1698
        }
1699
1700
        return array_filter($this->files, fn ($file) => $file['is_dir']);
1701
    }
1702
1703
    /**
1704
     * Get the largest file in the archive.
1705
     */
1706
    public function getLargestFile(): ?array
1707
    {
1708
        if (! $this->parsed) {
1709
            $this->parse();
1710
        }
1711
1712
        $largest = null;
1713
        $maxSize = -1;
1714
1715
        foreach ($this->files as $file) {
1716
            if ($file['size'] !== null && $file['size'] > $maxSize && ! $file['is_dir']) {
1717
                $maxSize = $file['size'];
1718
                $largest = $file;
1719
            }
1720
        }
1721
1722
        return $largest;
1723
    }
1724
}
1725
1726