1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace CHMLib; |
4
|
|
|
|
5
|
|
|
use Exception; |
6
|
|
|
use CHMLib\Reader\Reader; |
7
|
|
|
use CHMLib\Reader\StringReader; |
8
|
|
|
use CHMLib\Exception\UnexpectedHeaderException; |
9
|
|
|
use CHMLib\Reader\FileReader; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Handle the contents of a CHM file. |
13
|
|
|
*/ |
14
|
|
|
class CHM |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* The reader that provides the data. |
18
|
|
|
* |
19
|
|
|
* @var Reader |
20
|
|
|
*/ |
21
|
|
|
protected $reader; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* The CHM initial header. |
25
|
|
|
* |
26
|
|
|
* @var Header\ITSF |
27
|
|
|
*/ |
28
|
|
|
protected $itsf; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* The directory listing header. |
32
|
|
|
* |
33
|
|
|
* @var Header\ITSP |
34
|
|
|
*/ |
35
|
|
|
protected $itsp; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* The entries found in this CHM. |
39
|
|
|
* |
40
|
|
|
* @var Entry[] |
41
|
|
|
*/ |
42
|
|
|
protected $entries; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* The data sections. |
46
|
|
|
* |
47
|
|
|
* @var Section\Section[] |
48
|
|
|
*/ |
49
|
|
|
protected $sections; |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* The TOC. |
53
|
|
|
* |
54
|
|
|
* @var TOCIndex\Tree|null|false |
55
|
|
|
*/ |
56
|
|
|
protected $toc; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* The index. |
60
|
|
|
* |
61
|
|
|
* @var TOCIndex\Tree|null|false |
62
|
|
|
*/ |
63
|
|
|
protected $index; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Initializes the instance. |
67
|
|
|
* |
68
|
|
|
* @param Reader $reader The reader that provides the data. |
69
|
|
|
* |
70
|
|
|
* @throws Exception Throws an Exception in case of errors. |
71
|
|
|
*/ |
72
|
1 |
|
public function __construct(Reader $reader) |
73
|
|
|
{ |
74
|
1 |
|
$this->reader = $reader; |
75
|
1 |
|
$reader->setPosition(0); |
76
|
1 |
|
$this->itsf = new Header\ITSF($reader); |
77
|
1 |
|
if ($this->itsf->getSectionOffset() >= 0 && $this->itsf->getSectionLength() >= 16 /* === 24*/) { |
78
|
1 |
|
$reader->setPosition($this->itsf->getSectionOffset()); |
79
|
1 |
|
/* Unknown (510) */ $reader->readUInt32(); |
|
|
|
|
80
|
1 |
|
/* Unknown (0) */ $reader->readUInt32(); |
|
|
|
|
81
|
1 |
|
$totalLength = $reader->readUInt64(); |
82
|
1 |
|
if ($totalLength !== $reader->getLength()) { |
83
|
|
|
throw new Exception("Invalid CHM size: expected length $totalLength, current length {$reader->getLength()}"); |
84
|
|
|
} |
85
|
|
|
} |
86
|
1 |
|
$reader->setPosition($this->itsf->getDirectoryOffset()); |
87
|
1 |
|
$this->itsp = new Header\ITSP($reader); |
88
|
|
|
|
89
|
1 |
|
$expectedDirectoryLength = $this->itsf->getDirectoryLength(); |
90
|
1 |
|
$calculatedDirectoryLength = $this->itsp->getHeaderLength() + $this->itsp->getNumberOfDirectoryChunks() * $this->itsp->getDirectoryChunkSize(); |
91
|
1 |
|
if ($expectedDirectoryLength !== $calculatedDirectoryLength) { |
92
|
|
|
throw new Exception("Unexpected directory list size (expected: $expectedDirectoryLength, calculated: $calculatedDirectoryLength)"); |
93
|
|
|
} |
94
|
|
|
|
95
|
1 |
|
$this->sections = array(); |
96
|
1 |
|
$this->sections[0] = new Section\UncompressedSection($this); |
97
|
|
|
|
98
|
1 |
|
$this->entries = $this->retrieveEntryList(); |
99
|
|
|
|
100
|
1 |
|
$this->retrieveSectionList(); |
101
|
|
|
|
102
|
1 |
|
$this->toc = null; |
103
|
1 |
|
$this->index = null; |
104
|
1 |
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* Destruct the instance. |
108
|
|
|
*/ |
109
|
|
|
public function __destruct() |
110
|
|
|
{ |
111
|
|
|
unset($this->reader); |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* Create a new CHM instance reading a file. |
116
|
|
|
* |
117
|
|
|
* @param string $filename |
118
|
|
|
* |
119
|
|
|
* @return static |
120
|
|
|
*/ |
121
|
1 |
|
public static function fromFile($filename) |
122
|
|
|
{ |
123
|
1 |
|
$reader = new FileReader($filename); |
124
|
|
|
|
125
|
1 |
|
return new static($reader); |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Get the reader that provides the data. |
130
|
|
|
* |
131
|
|
|
* @return Reader |
132
|
|
|
*/ |
133
|
465 |
|
public function getReader() |
134
|
|
|
{ |
135
|
465 |
|
return $this->reader; |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
/** |
139
|
|
|
* Get the CHM initial header. |
140
|
|
|
* |
141
|
|
|
* @return Header\ITSF |
142
|
|
|
*/ |
143
|
1 |
|
public function getITSF() |
144
|
|
|
{ |
145
|
1 |
|
return $this->itsf; |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
/** |
149
|
|
|
* Get the directory listing header. |
150
|
|
|
* |
151
|
|
|
* @return Header\ITSP |
152
|
|
|
*/ |
153
|
|
|
public function getITSP() |
154
|
|
|
{ |
155
|
|
|
return $this->itsp; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Get an entry given its full path. |
160
|
|
|
* |
161
|
|
|
* @param string $path The full path (case sensitive) of the entry to look for. |
162
|
|
|
* |
163
|
|
|
* @return Entry|null |
164
|
|
|
*/ |
165
|
466 |
|
public function getEntryByPath($path) |
166
|
|
|
{ |
167
|
466 |
|
return isset($this->entries[$path]) ? $this->entries[$path] : null; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* Get the entries contained in this CHM. |
172
|
|
|
* |
173
|
|
|
* @param int|null $type One or more Entry::TYPE_... values (defaults to Entry::TYPE_FILE | Entry::TYPE_DIRECTORY if null). |
174
|
|
|
*/ |
175
|
1 |
|
public function getEntries($type = null) |
176
|
|
|
{ |
177
|
1 |
|
if ($type === null) { |
178
|
|
|
$type = Entry::TYPE_FILE | Entry::TYPE_DIRECTORY; |
179
|
|
|
} |
180
|
1 |
|
$result = array(); |
181
|
1 |
|
foreach ($this->entries as $entry) { |
182
|
1 |
|
if (($entry->getType() & $type) !== 0) { |
183
|
1 |
|
$result[] = $entry; |
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
|
187
|
1 |
|
return $result; |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* Return a section given its index. |
192
|
|
|
* |
193
|
|
|
* @param int $i |
194
|
|
|
* |
195
|
|
|
* @return Section\Section|null |
196
|
|
|
*/ |
197
|
466 |
|
public function getSectionByIndex($i) |
198
|
|
|
{ |
199
|
466 |
|
return isset($this->sections[$i]) ? $this->sections[$i] : null; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* Retrieve the list of the entries contained in this CHM. |
204
|
|
|
* |
205
|
|
|
* @throws Exception Throws an Exception in case of errors. |
206
|
|
|
* |
207
|
|
|
* @return Entry[] |
208
|
|
|
*/ |
209
|
1 |
|
protected function retrieveEntryList() |
210
|
|
|
{ |
211
|
1 |
|
$result = array(); |
212
|
1 |
|
$chunkOffset = $this->itsf->getDirectoryOffset() + $this->itsp->getHeaderLength(); |
213
|
1 |
|
$chunkSize = $this->itsp->getDirectoryChunkSize(); |
214
|
1 |
|
for ($i = $this->itsp->getFirstPMGLChunkNumber(), $l = $this->itsp->getLastPMGLChunkNumber(); $i <= $l; ++$i) { |
215
|
1 |
|
$offset = $chunkOffset + $i * $chunkSize; |
216
|
1 |
|
$this->reader->setPosition($offset); |
217
|
|
|
try { |
218
|
1 |
|
$pmgl = new Header\PMGL($this->reader); |
219
|
|
|
} catch (UnexpectedHeaderException $x) { |
220
|
|
|
if ($x->getFoundHeader() !== 'PMGI') { |
221
|
|
|
throw $x; |
222
|
|
|
} |
223
|
|
|
$this->reader->setPosition($offset); |
224
|
|
|
$pmgi = new Header\PMGI($this->reader); |
|
|
|
|
225
|
|
|
$pmgl = null; |
226
|
|
|
} |
227
|
1 |
|
if ($pmgl !== null) { |
228
|
1 |
|
$end = $offset + $chunkSize - $pmgl->getFreeSpace(); |
229
|
1 |
|
$cur = $this->reader->getPosition(); |
230
|
1 |
|
while ($cur < $end) { |
231
|
1 |
|
$this->reader->setPosition($cur); |
232
|
1 |
|
$entry = new Entry($this); |
233
|
1 |
|
$result[$entry->getPath()] = $entry; |
234
|
1 |
|
$cur = $this->reader->getPosition(); |
235
|
|
|
} |
236
|
|
|
} |
237
|
|
|
} |
238
|
|
|
|
239
|
1 |
|
return $result; |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
/** |
243
|
|
|
* Retrieve the list of the data sections contained in this CHM. |
244
|
|
|
* |
245
|
|
|
* @throws Exception Throws an Exception in case of errors. |
246
|
|
|
*/ |
247
|
1 |
|
protected function retrieveSectionList() |
248
|
|
|
{ |
249
|
1 |
|
$nameList = $this->getEntryByPath('::DataSpace/NameList'); |
250
|
|
|
|
251
|
1 |
|
if ($nameList === null) { |
252
|
|
|
throw new Exception("Missing required entry: '::DataSpace/NameList'"); |
253
|
|
|
} |
254
|
1 |
|
if ($nameList->getContentSectionIndex() !== 0) { |
255
|
|
|
throw new Exception("The content of the entry '{$nameList->getPath()}' should be in section 0, but it's in section {$nameList->getContentSection()}"); |
|
|
|
|
256
|
|
|
} |
257
|
|
|
|
258
|
1 |
|
$nameListReader = new StringReader($nameList->getContents()); |
259
|
1 |
|
/* Length */ $nameListReader->readUInt16(); |
260
|
1 |
|
$numSections = $nameListReader->readUInt16(); |
261
|
1 |
|
if ($numSections === 0) { |
262
|
|
|
throw new Exception('No content section defined.'); |
263
|
|
|
} |
264
|
1 |
|
for ($i = 0; $i < $numSections; ++$i) { |
265
|
1 |
|
$nameLength = $nameListReader->readUInt16(); |
266
|
1 |
|
$utf16name = $nameListReader->readString($nameLength * 2); |
267
|
1 |
|
$nameListReader->readUInt16(); |
268
|
1 |
|
$name = iconv('UTF-16LE', 'UTF-8', $utf16name); |
269
|
|
|
switch ($name) { |
270
|
1 |
|
case 'Uncompressed': |
271
|
1 |
|
break; |
272
|
1 |
|
case 'MSCompressed': |
273
|
1 |
|
if ($i === 0) { |
274
|
|
|
throw new Exception('First data section should be Uncompressed'); |
275
|
|
|
} else { |
276
|
1 |
|
$this->sections[$i] = new Section\MSCompressedSection($this); |
277
|
|
|
} |
278
|
1 |
|
break; |
279
|
|
|
default: |
280
|
|
|
throw new Exception("Unknown data section: $name"); |
281
|
|
|
} |
282
|
|
|
} |
283
|
1 |
|
} |
284
|
|
|
|
285
|
|
|
/** |
286
|
|
|
* Get the TOC of this CHM file (if available). |
287
|
|
|
* |
288
|
|
|
* @return SpecialEntry\TOC|null |
289
|
|
|
*/ |
290
|
|
View Code Duplication |
public function getTOC() |
|
|
|
|
291
|
|
|
{ |
292
|
|
|
if ($this->toc === null) { |
293
|
|
|
$r = false; |
294
|
|
|
foreach ($this->entries as $entry) { |
295
|
|
|
if ($entry->isFile() && strcasecmp(substr($entry->getPath(), -4), '.hhc') === 0) { |
296
|
|
|
$r = TOCIndex\Tree::fromString($this, $entry->getContents()); |
297
|
|
|
break; |
298
|
|
|
} |
299
|
|
|
} |
300
|
|
|
$this->toc = $r; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
return ($this->toc === false) ? null : $this->toc; |
304
|
|
|
} |
305
|
|
|
|
306
|
|
|
/** |
307
|
|
|
* Get the index of this CHM file (if available). |
308
|
|
|
* |
309
|
|
|
* @return TOCIndex\Tree|null |
310
|
|
|
*/ |
311
|
|
View Code Duplication |
public function getIndex() |
|
|
|
|
312
|
|
|
{ |
313
|
|
|
if ($this->index === null) { |
314
|
|
|
$r = false; |
315
|
|
|
foreach ($this->entries as $entry) { |
316
|
|
|
if ($entry->isFile() && strcasecmp(substr($entry->getPath(), -4), '.hhk') === 0) { |
317
|
|
|
$r = TOCIndex\Tree::fromString($this, $entry->getContents()); |
318
|
|
|
break; |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
$this->index = $r; |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
return ($this->index === false) ? null : $this->index; |
325
|
|
|
} |
326
|
|
|
} |
327
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.