1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace CHMLib\Section; |
4
|
|
|
|
5
|
|
|
use Exception; |
6
|
|
|
use CHMLib\CHM; |
7
|
|
|
use CHMLib\Reader\StringReader; |
8
|
|
|
use CHMLib\Reader\BitReader; |
9
|
|
|
use CHMLib\Header\LZXC; |
10
|
|
|
use CHMLib\LZX\Inflater; |
11
|
|
|
use CHMLib\LZX\LRUCache; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Represent a LXZ-compressed section of data in a CHM file. |
15
|
|
|
*/ |
16
|
|
|
class MSCompressedSection extends Section |
17
|
|
|
{ |
18
|
|
|
/** |
19
|
|
|
* The LZX reset interval. |
20
|
|
|
* |
21
|
|
|
* @var int |
22
|
|
|
*/ |
23
|
|
|
protected $resetInterval; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* The window size. |
27
|
|
|
* |
28
|
|
|
* @var int |
29
|
|
|
*/ |
30
|
|
|
protected $windowSize; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* The size of the uncompressed data. |
34
|
|
|
* |
35
|
|
|
* @var int |
36
|
|
|
*/ |
37
|
|
|
protected $uncompressedLength; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* The size of the compressed data. |
41
|
|
|
* |
42
|
|
|
* @var int |
43
|
|
|
*/ |
44
|
|
|
protected $compressedLength; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* The block size. |
48
|
|
|
* |
49
|
|
|
* @var int |
50
|
|
|
*/ |
51
|
|
|
protected $blockSize; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* The address table. |
55
|
|
|
* |
56
|
|
|
* @var int[] |
57
|
|
|
*/ |
58
|
|
|
protected $addressTable; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* The currently cached blocks. |
62
|
|
|
* |
63
|
|
|
* @var LRUCache |
64
|
|
|
*/ |
65
|
|
|
protected $cachedBlocks; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* Initializes the instance. |
69
|
|
|
* |
70
|
|
|
* @param CHM $chm The parent CHM file. |
71
|
|
|
* |
72
|
|
|
* @throws Exception Throws an Exception in case of errors. |
73
|
|
|
*/ |
74
|
1 |
|
public function __construct(CHM $chm) |
75
|
|
|
{ |
76
|
1 |
|
parent::__construct($chm); |
77
|
1 |
|
$controlDataEntry = $chm->getEntryByPath('::DataSpace/Storage/MSCompressed/ControlData'); |
78
|
1 |
|
if ($controlDataEntry === null) { |
79
|
|
|
throw new Exception("Missing required entry: '::DataSpace/Storage/MSCompressed/ControlData'"); |
80
|
|
|
} |
81
|
1 |
|
if ($controlDataEntry->getContentSectionIndex() !== 0) { |
82
|
|
|
throw new Exception("The content of the entry '{$controlDataEntry->getPath()}' should be in section 0, but it's in section {$controlDataEntry->getContentSectionIndex()}"); |
83
|
|
|
} |
84
|
1 |
|
$controlDataReader = new StringReader($controlDataEntry->getContents()); |
85
|
1 |
|
$lzxc = new LZXC($controlDataReader); |
86
|
1 |
|
$this->resetInterval = $lzxc->getResetInterval(); |
87
|
1 |
|
$this->windowSize = $lzxc->getWindowSize() * 32768; |
88
|
1 |
|
$this->cachedBlocks = new LRUCache((1 + $lzxc->getCacheSize()) << 2); |
89
|
1 |
|
$resetTableEntry = $chm->getEntryByPath('::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable'); |
90
|
1 |
|
if ($resetTableEntry === null) { |
91
|
|
|
throw new Exception("Missing required entry: '::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable'"); |
92
|
|
|
} |
93
|
1 |
|
if ($resetTableEntry->getContentSectionIndex() !== 0) { |
94
|
|
|
throw new Exception("The content of the entry '{$resetTableEntry->getPath()}' should be in section 0, but it's in section {$resetTableEntry->getContentSectionIndex()}"); |
95
|
|
|
} |
96
|
1 |
|
$resetTableReader = new StringReader($resetTableEntry->getContents()); |
97
|
1 |
|
$resetTableVersion = $resetTableReader->readUInt32(); |
98
|
1 |
|
if ($resetTableVersion !== 2) { |
99
|
|
|
throw new Exception("Unsupported LZX Reset Table version: $resetTableVersion"); |
100
|
|
|
} |
101
|
1 |
|
$addressTableSize = $resetTableReader->readUInt32(); |
102
|
1 |
|
/* Size of table entry (8) */ $resetTableReader->readUInt32(); |
103
|
1 |
|
/* Header length (40) */ $resetTableReader->readUInt32(); |
|
|
|
|
104
|
1 |
|
$this->uncompressedLength = $resetTableReader->readUInt64(); |
|
|
|
|
105
|
1 |
|
$this->compressedLength = $resetTableReader->readUInt64(); |
|
|
|
|
106
|
1 |
|
$this->blockSize = $resetTableReader->readUInt64(); // We do not support block sizes bigger than 32-bit integers |
|
|
|
|
107
|
1 |
|
$this->addressTable = array(); |
108
|
1 |
|
for ($i = 0; $i < $addressTableSize; ++$i) { |
109
|
1 |
|
$this->addressTable[$i] = $resetTableReader->readUInt64(); |
110
|
1 |
|
} |
111
|
1 |
|
$contentEntry = $chm->getEntryByPath('::DataSpace/Storage/MSCompressed/Content'); |
112
|
1 |
|
if ($contentEntry === null) { |
113
|
|
|
throw new Exception("Missing required entry: '::DataSpace/Storage/MSCompressed/Content"); |
114
|
|
|
} |
115
|
1 |
|
if ($this->compressedLength !== $contentEntry->getLength()) { |
116
|
|
|
throw new Exception("Compressed section size should be $compressedLength, but it's {$contentEntry->getLength()}"); |
117
|
|
|
} |
118
|
1 |
|
$this->sectionOffset = $chm->getITSF()->getContentOffset() + $contentEntry->getOffset(); |
119
|
1 |
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* {@inheritdoc} |
123
|
|
|
* |
124
|
|
|
* @see Section::getContents() |
125
|
|
|
*/ |
126
|
464 |
|
public function getContents($offset, $length) |
127
|
|
|
{ |
128
|
464 |
|
$result = ''; |
129
|
464 |
|
if ($length > 0) { |
130
|
464 |
|
$startBlockNo = (int) ($offset / $this->blockSize); |
131
|
464 |
|
$startOffset = $offset % $this->blockSize; |
132
|
464 |
|
$endBlockNo = (int) (($offset + $length) / $this->blockSize); |
133
|
464 |
|
$endOffset = (int) (($offset + $length) % $this->blockSize); |
134
|
464 |
|
if ($endOffset === 0 && $endBlockNo > $startBlockNo) { |
135
|
|
|
$endOffset = $this->blockSize; |
136
|
|
|
--$endBlockNo; |
137
|
|
|
} |
138
|
464 |
|
$blockNo = $startBlockNo - $startBlockNo % $this->resetInterval; |
139
|
464 |
|
$inflater = new Inflater($this->windowSize); |
140
|
|
|
|
141
|
464 |
|
$buf = array(); |
|
|
|
|
142
|
464 |
|
$pos = 0; |
143
|
464 |
|
$bytesLeft = 0; |
144
|
464 |
|
$reader = $this->chm->getReader(); |
145
|
464 |
|
while ($bytesLeft > 0 || $blockNo <= $endBlockNo) { |
146
|
464 |
|
$data = ''; |
147
|
464 |
|
while ($bytesLeft <= 0) { |
148
|
|
|
// Read block |
149
|
464 |
|
if ($blockNo > $endBlockNo) { |
150
|
|
|
throw new Exception('Read after last data block'); |
151
|
|
|
} |
152
|
464 |
|
$cacheNo = (int) ($blockNo / $this->resetInterval); |
153
|
464 |
|
$cache = $this->cachedBlocks->get($cacheNo); |
154
|
464 |
|
if ($cache === null) { |
155
|
23 |
|
$this->cachedBlocks->prune(); |
156
|
23 |
|
$cache = array(); |
157
|
23 |
|
$resetBlockNo = $blockNo - $blockNo % $this->resetInterval; |
158
|
23 |
|
for ($i = 0; $i < $this->resetInterval && $resetBlockNo + $i < count($this->addressTable); ++$i) { |
159
|
23 |
|
$thisBlockNo = $resetBlockNo + $i; |
160
|
23 |
|
$len = ($thisBlockNo + 1 < count($this->addressTable)) ? |
161
|
23 |
|
($this->addressTable[$thisBlockNo + 1] - $this->addressTable[$thisBlockNo]) |
162
|
23 |
|
: |
163
|
23 |
|
($this->compressedLength - $this->addressTable[$thisBlockNo]); |
164
|
23 |
|
$reader->setPosition($this->sectionOffset + $this->addressTable[$thisBlockNo]); |
165
|
23 |
|
$bitReader = new BitReader($reader->readString($len)); |
166
|
23 |
|
$cache[$i] = $inflater->inflate( |
167
|
23 |
|
$i === 0, |
168
|
23 |
|
$bitReader, |
169
|
23 |
|
$this->blockSize |
170
|
23 |
|
); |
171
|
23 |
|
} |
172
|
23 |
|
$this->cachedBlocks->put($cacheNo, $cache); |
173
|
23 |
|
} |
174
|
464 |
|
$data = $cache[$blockNo % $this->resetInterval]; |
175
|
|
|
// the start block has special pos value |
176
|
464 |
|
$pos = ($blockNo === $startBlockNo) ? $startOffset : 0; |
177
|
|
|
// the end block has special length |
178
|
464 |
|
$bytesLeft = ($blockNo < $startBlockNo) ? 0 : (($blockNo < $endBlockNo) ? $this->blockSize : $endOffset); |
179
|
464 |
|
$bytesLeft -= $pos; |
180
|
464 |
|
++$blockNo; |
181
|
464 |
|
} |
182
|
464 |
|
$togo = $bytesLeft; |
183
|
464 |
|
$result .= substr($data, $pos, $togo); |
184
|
464 |
|
$pos += $togo; |
185
|
464 |
|
$bytesLeft -= $togo; |
186
|
464 |
|
} |
187
|
464 |
|
} |
188
|
|
|
|
189
|
464 |
|
return $result; |
190
|
|
|
} |
191
|
|
|
} |
192
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.