1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMAttr; |
6
|
|
|
use DOMDocument; |
7
|
|
|
use DOMElement; |
8
|
|
|
use DOMNode; |
9
|
|
|
use DOMText; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
11
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\DataType; |
12
|
|
|
use PhpOffice\PhpSpreadsheet\Helper\Dimension as HelperDimension; |
13
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter; |
14
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames; |
15
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator; |
16
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings; |
17
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; |
18
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; |
19
|
|
|
use PhpOffice\PhpSpreadsheet\RichText\RichText; |
20
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\Date; |
21
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\File; |
22
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
23
|
|
|
use PhpOffice\PhpSpreadsheet\Style\NumberFormat; |
24
|
|
|
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; |
25
|
|
|
use Throwable; |
26
|
|
|
use XMLReader; |
27
|
|
|
use ZipArchive; |
28
|
|
|
|
29
|
|
|
class Ods extends BaseReader |
30
|
|
|
{ |
31
|
|
|
const INITIAL_FILE = 'content.xml'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Create a new Ods Reader instance. |
35
|
|
|
*/ |
36
|
93 |
|
public function __construct() |
37
|
|
|
{ |
38
|
93 |
|
parent::__construct(); |
39
|
93 |
|
$this->securityScanner = XmlScanner::getInstance($this); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Can the current IReader read the file? |
44
|
|
|
*/ |
45
|
17 |
|
public function canRead(string $filename): bool |
46
|
|
|
{ |
47
|
17 |
|
$mimeType = 'UNKNOWN'; |
48
|
|
|
|
49
|
|
|
// Load file |
50
|
|
|
|
51
|
17 |
|
if (File::testFileNoThrow($filename, '')) { |
52
|
3 |
|
$zip = new ZipArchive(); |
53
|
3 |
|
if ($zip->open($filename) === true) { |
54
|
|
|
// check if it is an OOXML archive |
55
|
3 |
|
$stat = $zip->statName('mimetype'); |
56
|
3 |
|
if (!empty($stat) && ($stat['size'] <= 255)) { |
57
|
2 |
|
$mimeType = $zip->getFromName($stat['name']); |
58
|
1 |
|
} elseif ($zip->statName('META-INF/manifest.xml')) { |
59
|
1 |
|
$xml = simplexml_load_string( |
60
|
1 |
|
$this->getSecurityScannerOrThrow() |
61
|
1 |
|
->scan( |
62
|
1 |
|
$zip->getFromName( |
63
|
1 |
|
'META-INF/manifest.xml' |
64
|
1 |
|
) |
65
|
1 |
|
) |
66
|
1 |
|
); |
67
|
1 |
|
if ($xml !== false) { |
68
|
1 |
|
$namespacesContent = $xml->getNamespaces(true); |
69
|
1 |
|
if (isset($namespacesContent['manifest'])) { |
70
|
1 |
|
$manifest = $xml->children($namespacesContent['manifest']); |
71
|
1 |
|
foreach ($manifest as $manifestDataSet) { |
72
|
1 |
|
$manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); |
73
|
1 |
|
if ($manifestAttributes && $manifestAttributes->{'full-path'} == '/') { |
74
|
1 |
|
$mimeType = (string) $manifestAttributes->{'media-type'}; |
75
|
|
|
|
76
|
1 |
|
break; |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
3 |
|
$zip->close(); |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|
87
|
17 |
|
return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. |
92
|
|
|
* |
93
|
|
|
* @return string[] |
94
|
|
|
*/ |
95
|
6 |
|
public function listWorksheetNames(string $filename): array |
96
|
|
|
{ |
97
|
6 |
|
File::assertFile($filename, self::INITIAL_FILE); |
98
|
|
|
|
99
|
2 |
|
$worksheetNames = []; |
100
|
|
|
|
101
|
2 |
|
$xml = new XMLReader(); |
102
|
2 |
|
$xml->xml( |
103
|
2 |
|
$this->getSecurityScannerOrThrow() |
104
|
2 |
|
->scanFile( |
105
|
2 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
106
|
2 |
|
) |
107
|
2 |
|
); |
108
|
2 |
|
$xml->setParserProperty(2, true); |
109
|
|
|
|
110
|
|
|
// Step into the first level of content of the XML |
111
|
2 |
|
$xml->read(); |
112
|
2 |
|
while ($xml->read()) { |
113
|
|
|
// Quickly jump through to the office:body node |
114
|
2 |
|
while (self::getXmlName($xml) !== 'office:body') { |
115
|
2 |
|
if ($xml->isEmptyElement) { |
116
|
2 |
|
$xml->read(); |
117
|
|
|
} else { |
118
|
2 |
|
$xml->next(); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
// Now read each node until we find our first table:table node |
122
|
2 |
|
while ($xml->read()) { |
123
|
2 |
|
$xmlName = self::getXmlName($xml); |
124
|
2 |
|
if ($xmlName == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
125
|
|
|
// Loop through each table:table node reading the table:name attribute for each worksheet name |
126
|
|
|
do { |
127
|
2 |
|
$worksheetName = $xml->getAttribute('table:name'); |
128
|
2 |
|
if (!empty($worksheetName)) { |
129
|
2 |
|
$worksheetNames[] = $worksheetName; |
130
|
|
|
} |
131
|
2 |
|
$xml->next(); |
132
|
2 |
|
} while (self::getXmlName($xml) == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
2 |
|
return $worksheetNames; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
142
|
|
|
*/ |
143
|
5 |
|
public function listWorksheetInfo(string $filename): array |
144
|
|
|
{ |
145
|
5 |
|
File::assertFile($filename, self::INITIAL_FILE); |
146
|
|
|
|
147
|
1 |
|
$worksheetInfo = []; |
148
|
|
|
|
149
|
1 |
|
$xml = new XMLReader(); |
150
|
1 |
|
$xml->xml( |
151
|
1 |
|
$this->getSecurityScannerOrThrow() |
152
|
1 |
|
->scanFile( |
153
|
1 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
154
|
1 |
|
) |
155
|
1 |
|
); |
156
|
1 |
|
$xml->setParserProperty(2, true); |
157
|
|
|
|
158
|
|
|
// Step into the first level of content of the XML |
159
|
1 |
|
$xml->read(); |
160
|
1 |
|
while ($xml->read()) { |
161
|
|
|
// Quickly jump through to the office:body node |
162
|
1 |
|
while (self::getXmlName($xml) !== 'office:body') { |
163
|
1 |
|
if ($xml->isEmptyElement) { |
164
|
1 |
|
$xml->read(); |
165
|
|
|
} else { |
166
|
1 |
|
$xml->next(); |
167
|
|
|
} |
168
|
|
|
} |
169
|
|
|
// Now read each node until we find our first table:table node |
170
|
1 |
|
while ($xml->read()) { |
171
|
1 |
|
if (self::getXmlName($xml) == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
172
|
1 |
|
$worksheetNames[] = $xml->getAttribute('table:name'); |
173
|
|
|
|
174
|
1 |
|
$tmpInfo = [ |
175
|
1 |
|
'worksheetName' => $xml->getAttribute('table:name'), |
176
|
1 |
|
'lastColumnLetter' => 'A', |
177
|
1 |
|
'lastColumnIndex' => 0, |
178
|
1 |
|
'totalRows' => 0, |
179
|
1 |
|
'totalColumns' => 0, |
180
|
1 |
|
]; |
181
|
|
|
|
182
|
|
|
// Loop through each child node of the table:table element reading |
183
|
1 |
|
$currCells = 0; |
184
|
|
|
do { |
185
|
1 |
|
$xml->read(); |
186
|
1 |
|
if (self::getXmlName($xml) == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { |
187
|
1 |
|
$rowspan = $xml->getAttribute('table:number-rows-repeated'); |
188
|
1 |
|
$rowspan = empty($rowspan) ? 1 : $rowspan; |
189
|
1 |
|
$tmpInfo['totalRows'] += $rowspan; |
190
|
1 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
191
|
1 |
|
$currCells = 0; |
192
|
|
|
// Step into the row |
193
|
1 |
|
$xml->read(); |
194
|
|
|
do { |
195
|
1 |
|
$doread = true; |
196
|
1 |
|
if (self::getXmlName($xml) == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
197
|
1 |
|
if (!$xml->isEmptyElement) { |
198
|
1 |
|
++$currCells; |
199
|
1 |
|
$xml->next(); |
200
|
1 |
|
$doread = false; |
201
|
|
|
} |
202
|
1 |
|
} elseif (self::getXmlName($xml) == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
203
|
1 |
|
$mergeSize = $xml->getAttribute('table:number-columns-repeated'); |
204
|
1 |
|
$currCells += (int) $mergeSize; |
205
|
|
|
} |
206
|
1 |
|
if ($doread) { |
207
|
1 |
|
$xml->read(); |
208
|
|
|
} |
209
|
1 |
|
} while (self::getXmlName($xml) != 'table:table-row'); |
210
|
|
|
} |
211
|
1 |
|
} while (self::getXmlName($xml) != 'table:table'); |
212
|
|
|
|
213
|
1 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
214
|
1 |
|
$tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; |
215
|
1 |
|
$tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); |
216
|
1 |
|
$worksheetInfo[] = $tmpInfo; |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
} |
220
|
|
|
|
221
|
1 |
|
return $worksheetInfo; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* Counteract Phpstan caching. |
226
|
|
|
* |
227
|
|
|
* @phpstan-impure |
228
|
|
|
*/ |
229
|
3 |
|
private static function getXmlName(XMLReader $xml): string |
230
|
|
|
{ |
231
|
3 |
|
return $xml->name; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
/** |
235
|
|
|
* Loads PhpSpreadsheet from file. |
236
|
|
|
*/ |
237
|
63 |
|
protected function loadSpreadsheetFromFile(string $filename): Spreadsheet |
238
|
|
|
{ |
239
|
|
|
// Create new Spreadsheet |
240
|
63 |
|
$spreadsheet = new Spreadsheet(); |
241
|
63 |
|
$spreadsheet->setValueBinder($this->valueBinder); |
242
|
63 |
|
$spreadsheet->removeSheetByIndex(0); |
243
|
|
|
|
244
|
|
|
// Load into this instance |
245
|
63 |
|
return $this->loadIntoExisting($filename, $spreadsheet); |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
/** |
249
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
250
|
|
|
*/ |
251
|
67 |
|
public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet |
252
|
|
|
{ |
253
|
67 |
|
File::assertFile($filename, self::INITIAL_FILE); |
254
|
|
|
|
255
|
63 |
|
$zip = new ZipArchive(); |
256
|
63 |
|
$zip->open($filename); |
257
|
|
|
|
258
|
|
|
// Meta |
259
|
|
|
|
260
|
63 |
|
$xml = @simplexml_load_string( |
261
|
63 |
|
$this->getSecurityScannerOrThrow() |
262
|
63 |
|
->scan($zip->getFromName('meta.xml')) |
263
|
63 |
|
); |
264
|
63 |
|
if ($xml === false) { |
265
|
1 |
|
throw new Exception('Unable to read data from {$pFilename}'); |
266
|
|
|
} |
267
|
|
|
|
268
|
62 |
|
$namespacesMeta = $xml->getNamespaces(true); |
269
|
|
|
|
270
|
62 |
|
(new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); |
271
|
|
|
|
272
|
|
|
// Styles |
273
|
|
|
|
274
|
62 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
275
|
62 |
|
$dom->loadXML( |
276
|
62 |
|
$this->getSecurityScannerOrThrow() |
277
|
62 |
|
->scan($zip->getFromName('styles.xml')) |
278
|
62 |
|
); |
279
|
|
|
|
280
|
62 |
|
$pageSettings = new PageSettings($dom); |
281
|
|
|
|
282
|
|
|
// Main Content |
283
|
|
|
|
284
|
62 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
285
|
62 |
|
$dom->loadXML( |
286
|
62 |
|
$this->getSecurityScannerOrThrow() |
287
|
62 |
|
->scan($zip->getFromName(self::INITIAL_FILE)) |
288
|
62 |
|
); |
289
|
|
|
|
290
|
62 |
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
291
|
62 |
|
$tableNs = (string) $dom->lookupNamespaceUri('table'); |
292
|
62 |
|
$textNs = (string) $dom->lookupNamespaceUri('text'); |
293
|
62 |
|
$xlinkNs = (string) $dom->lookupNamespaceUri('xlink'); |
294
|
62 |
|
$styleNs = (string) $dom->lookupNamespaceUri('style'); |
295
|
|
|
|
296
|
62 |
|
$pageSettings->readStyleCrossReferences($dom); |
297
|
|
|
|
298
|
62 |
|
$autoFilterReader = new AutoFilter($spreadsheet, $tableNs); |
299
|
62 |
|
$definedNameReader = new DefinedNames($spreadsheet, $tableNs); |
300
|
62 |
|
$columnWidths = []; |
301
|
62 |
|
$automaticStyle0 = $dom->getElementsByTagNameNS($officeNs, 'automatic-styles')->item(0); |
302
|
62 |
|
$automaticStyles = ($automaticStyle0 === null) ? [] : $automaticStyle0->getElementsByTagNameNS($styleNs, 'style'); |
303
|
62 |
|
foreach ($automaticStyles as $automaticStyle) { |
304
|
62 |
|
$styleName = $automaticStyle->getAttributeNS($styleNs, 'name'); |
305
|
62 |
|
$styleFamily = $automaticStyle->getAttributeNS($styleNs, 'family'); |
306
|
62 |
|
if ($styleFamily === 'table-column') { |
307
|
39 |
|
$tcprops = $automaticStyle->getElementsByTagNameNS($styleNs, 'table-column-properties'); |
308
|
39 |
|
if ($tcprops !== null) { |
309
|
39 |
|
$tcprop = $tcprops->item(0); |
310
|
39 |
|
if ($tcprop !== null) { |
311
|
39 |
|
$columnWidth = $tcprop->getAttributeNs($styleNs, 'column-width'); |
312
|
39 |
|
$columnWidths[$styleName] = $columnWidth; |
313
|
|
|
} |
314
|
|
|
} |
315
|
|
|
} |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
// Content |
319
|
62 |
|
$item0 = $dom->getElementsByTagNameNS($officeNs, 'body')->item(0); |
320
|
62 |
|
$spreadsheets = ($item0 === null) ? [] : $item0->getElementsByTagNameNS($officeNs, 'spreadsheet'); |
321
|
|
|
|
322
|
62 |
|
foreach ($spreadsheets as $workbookData) { |
323
|
|
|
/** @var DOMElement $workbookData */ |
324
|
62 |
|
$tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); |
325
|
|
|
|
326
|
62 |
|
$worksheetID = 0; |
327
|
62 |
|
foreach ($tables as $worksheetDataSet) { |
328
|
|
|
/** @var DOMElement $worksheetDataSet */ |
329
|
62 |
|
$worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); |
330
|
|
|
|
331
|
|
|
// Check loadSheetsOnly |
332
|
|
|
if ( |
333
|
62 |
|
$this->loadSheetsOnly !== null |
334
|
|
|
&& $worksheetName |
335
|
62 |
|
&& !in_array($worksheetName, $this->loadSheetsOnly) |
336
|
|
|
) { |
337
|
3 |
|
continue; |
338
|
|
|
} |
339
|
|
|
|
340
|
61 |
|
$worksheetStyleName = $worksheetDataSet->getAttributeNS($tableNs, 'style-name'); |
341
|
|
|
|
342
|
|
|
// Create sheet |
343
|
61 |
|
$spreadsheet->createSheet(); |
344
|
61 |
|
$spreadsheet->setActiveSheetIndex($worksheetID); |
345
|
|
|
|
346
|
61 |
|
if ($worksheetName || is_numeric($worksheetName)) { |
347
|
|
|
// Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in |
348
|
|
|
// formula cells... during the load, all formulae should be correct, and we're simply |
349
|
|
|
// bringing the worksheet name in line with the formula, not the reverse |
350
|
61 |
|
$spreadsheet->getActiveSheet()->setTitle((string) $worksheetName, false, false); |
351
|
|
|
} |
352
|
|
|
|
353
|
|
|
// Go through every child of table element |
354
|
61 |
|
$rowID = 1; |
355
|
61 |
|
$tableColumnIndex = 1; |
356
|
61 |
|
foreach ($worksheetDataSet->childNodes as $childNode) { |
357
|
|
|
/** @var DOMElement $childNode */ |
358
|
|
|
|
359
|
|
|
// Filter elements which are not under the "table" ns |
360
|
61 |
|
if ($childNode->namespaceURI != $tableNs) { |
361
|
34 |
|
continue; |
362
|
|
|
} |
363
|
|
|
|
364
|
61 |
|
$key = $childNode->nodeName; |
365
|
|
|
|
366
|
|
|
// Remove ns from node name |
367
|
61 |
|
if (str_contains($key, ':')) { |
368
|
61 |
|
$keyChunks = explode(':', $key); |
369
|
61 |
|
$key = array_pop($keyChunks); |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
switch ($key) { |
373
|
61 |
|
case 'table-header-rows': |
374
|
|
|
/// TODO :: Figure this out. This is only a partial implementation I guess. |
375
|
|
|
// ($rowData it's not used at all and I'm not sure that PHPExcel |
376
|
|
|
// has an API for this) |
377
|
|
|
|
378
|
|
|
// foreach ($rowData as $keyRowData => $cellData) { |
379
|
|
|
// $rowData = $cellData; |
380
|
|
|
// break; |
381
|
|
|
// } |
382
|
|
|
break; |
383
|
61 |
|
case 'table-column': |
384
|
38 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
385
|
35 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-columns-repeated'); |
386
|
|
|
} else { |
387
|
14 |
|
$rowRepeats = 1; |
388
|
|
|
} |
389
|
38 |
|
$tableStyleName = $childNode->getAttributeNS($tableNs, 'style-name'); |
390
|
38 |
|
if (isset($columnWidths[$tableStyleName])) { |
391
|
38 |
|
$columnWidth = new HelperDimension($columnWidths[$tableStyleName]); |
392
|
38 |
|
$tableColumnString = Coordinate::stringFromColumnIndex($tableColumnIndex); |
393
|
38 |
|
for ($rowRepeats2 = $rowRepeats; $rowRepeats2 > 0; --$rowRepeats2) { |
394
|
38 |
|
$spreadsheet->getActiveSheet() |
395
|
38 |
|
->getColumnDimension($tableColumnString) |
396
|
38 |
|
->setWidth($columnWidth->toUnit('cm'), 'cm'); |
397
|
38 |
|
++$tableColumnString; |
398
|
|
|
} |
399
|
|
|
} |
400
|
38 |
|
$tableColumnIndex += $rowRepeats; |
401
|
|
|
|
402
|
38 |
|
break; |
403
|
61 |
|
case 'table-row': |
404
|
61 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { |
405
|
18 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); |
406
|
|
|
} else { |
407
|
60 |
|
$rowRepeats = 1; |
408
|
|
|
} |
409
|
|
|
|
410
|
61 |
|
$columnID = 'A'; |
411
|
|
|
/** @var DOMElement|DOMText $cellData */ |
412
|
61 |
|
foreach ($childNode->childNodes as $cellData) { |
413
|
61 |
|
if ($cellData instanceof DOMText) { |
414
|
1 |
|
continue; // should just be whitespace |
415
|
|
|
} |
416
|
61 |
|
if ($this->getReadFilter() !== null) { |
417
|
61 |
|
if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { |
418
|
2 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
419
|
2 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
420
|
|
|
} else { |
421
|
2 |
|
$colRepeats = 1; |
422
|
|
|
} |
423
|
|
|
|
424
|
2 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
425
|
2 |
|
++$columnID; |
426
|
|
|
} |
427
|
|
|
|
428
|
2 |
|
continue; |
429
|
|
|
} |
430
|
|
|
} |
431
|
|
|
|
432
|
|
|
// Initialize variables |
433
|
61 |
|
$formatting = $hyperlink = null; |
434
|
61 |
|
$hasCalculatedValue = false; |
435
|
61 |
|
$cellDataFormula = ''; |
436
|
61 |
|
$cellDataType = ''; |
437
|
61 |
|
$cellDataRef = ''; |
438
|
|
|
|
439
|
61 |
|
if ($cellData->hasAttributeNS($tableNs, 'formula')) { |
440
|
23 |
|
$cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); |
441
|
23 |
|
$hasCalculatedValue = true; |
442
|
|
|
} |
443
|
61 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-columns-spanned')) { |
444
|
10 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-rows-spanned')) { |
445
|
10 |
|
$cellDataType = 'array'; |
446
|
10 |
|
$arrayRow = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-rows-spanned'); |
447
|
10 |
|
$arrayCol = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-columns-spanned'); |
448
|
10 |
|
$lastRow = $rowID + $arrayRow - 1; |
449
|
10 |
|
$lastCol = $columnID; |
450
|
10 |
|
while ($arrayCol > 1) { |
451
|
7 |
|
++$lastCol; |
452
|
7 |
|
--$arrayCol; |
453
|
|
|
} |
454
|
10 |
|
$cellDataRef = "$columnID$rowID:$lastCol$lastRow"; |
455
|
|
|
} |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
// Annotations |
459
|
61 |
|
$annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); |
460
|
|
|
|
461
|
61 |
|
if ($annotation->length > 0 && $annotation->item(0) !== null) { |
462
|
11 |
|
$textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); |
463
|
11 |
|
$textNodeLength = $textNode->length; |
464
|
11 |
|
$newLineOwed = false; |
465
|
11 |
|
for ($textNodeIndex = 0; $textNodeIndex < $textNodeLength; ++$textNodeIndex) { |
466
|
11 |
|
$textNodeItem = $textNode->item($textNodeIndex); |
467
|
11 |
|
if ($textNodeItem !== null) { |
468
|
11 |
|
$text = $this->scanElementForText($textNodeItem); |
469
|
11 |
|
if ($newLineOwed) { |
470
|
1 |
|
$spreadsheet->getActiveSheet() |
471
|
1 |
|
->getComment($columnID . $rowID) |
472
|
1 |
|
->getText() |
473
|
1 |
|
->createText("\n"); |
474
|
|
|
} |
475
|
11 |
|
$newLineOwed = true; |
476
|
|
|
|
477
|
11 |
|
$spreadsheet->getActiveSheet() |
478
|
11 |
|
->getComment($columnID . $rowID) |
479
|
11 |
|
->getText() |
480
|
11 |
|
->createText($this->parseRichText($text)); |
481
|
|
|
} |
482
|
|
|
} |
483
|
|
|
} |
484
|
|
|
|
485
|
|
|
// Content |
486
|
|
|
|
487
|
|
|
/** @var DOMElement[] $paragraphs */ |
488
|
61 |
|
$paragraphs = []; |
489
|
|
|
|
490
|
61 |
|
foreach ($cellData->childNodes as $item) { |
491
|
|
|
/** @var DOMElement $item */ |
492
|
|
|
|
493
|
|
|
// Filter text:p elements |
494
|
59 |
|
if ($item->nodeName == 'text:p') { |
495
|
59 |
|
$paragraphs[] = $item; |
496
|
|
|
} |
497
|
|
|
} |
498
|
|
|
|
499
|
61 |
|
if (count($paragraphs) > 0) { |
500
|
|
|
// Consolidate if there are multiple p records (maybe with spans as well) |
501
|
59 |
|
$dataArray = []; |
502
|
|
|
|
503
|
|
|
// Text can have multiple text:p and within those, multiple text:span. |
504
|
|
|
// text:p newlines, but text:span does not. |
505
|
|
|
// Also, here we assume there is no text data is span fields are specified, since |
506
|
|
|
// we have no way of knowing proper positioning anyway. |
507
|
|
|
|
508
|
59 |
|
foreach ($paragraphs as $pData) { |
509
|
59 |
|
$dataArray[] = $this->scanElementForText($pData); |
510
|
|
|
} |
511
|
59 |
|
$allCellDataText = implode("\n", $dataArray); |
512
|
|
|
|
513
|
59 |
|
$type = $cellData->getAttributeNS($officeNs, 'value-type'); |
514
|
|
|
|
515
|
|
|
switch ($type) { |
516
|
59 |
|
case 'string': |
517
|
37 |
|
$type = DataType::TYPE_STRING; |
518
|
37 |
|
$dataValue = $allCellDataText; |
519
|
|
|
|
520
|
37 |
|
foreach ($paragraphs as $paragraph) { |
521
|
37 |
|
$link = $paragraph->getElementsByTagNameNS($textNs, 'a'); |
522
|
37 |
|
if ($link->length > 0 && $link->item(0) !== null) { |
523
|
7 |
|
$hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); |
524
|
|
|
} |
525
|
|
|
} |
526
|
|
|
|
527
|
37 |
|
break; |
528
|
40 |
|
case 'boolean': |
529
|
8 |
|
$type = DataType::TYPE_BOOL; |
530
|
8 |
|
$dataValue = ($cellData->getAttributeNS($officeNs, 'boolean-value') === 'true') ? true : false; |
531
|
|
|
|
532
|
8 |
|
break; |
533
|
38 |
|
case 'percentage': |
534
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
535
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
536
|
|
|
|
537
|
|
|
// percentage should always be float |
538
|
|
|
//if (floor($dataValue) == $dataValue) { |
539
|
|
|
// $dataValue = (int) $dataValue; |
540
|
|
|
//} |
541
|
4 |
|
$formatting = NumberFormat::FORMAT_PERCENTAGE_00; |
542
|
|
|
|
543
|
4 |
|
break; |
544
|
38 |
|
case 'currency': |
545
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
546
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
547
|
|
|
|
548
|
4 |
|
if (floor($dataValue) == $dataValue) { |
549
|
4 |
|
$dataValue = (int) $dataValue; |
550
|
|
|
} |
551
|
4 |
|
$formatting = NumberFormat::FORMAT_CURRENCY_USD_INTEGER; |
552
|
|
|
|
553
|
4 |
|
break; |
554
|
34 |
|
case 'float': |
555
|
34 |
|
$type = DataType::TYPE_NUMERIC; |
556
|
34 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
557
|
|
|
|
558
|
34 |
|
if (floor($dataValue) == $dataValue) { |
559
|
32 |
|
if ($dataValue == (int) $dataValue) { |
560
|
32 |
|
$dataValue = (int) $dataValue; |
561
|
|
|
} |
562
|
|
|
} |
563
|
|
|
|
564
|
34 |
|
break; |
565
|
7 |
|
case 'date': |
566
|
7 |
|
$type = DataType::TYPE_NUMERIC; |
567
|
7 |
|
$value = $cellData->getAttributeNS($officeNs, 'date-value'); |
568
|
7 |
|
$dataValue = Date::convertIsoDate($value); |
569
|
|
|
|
570
|
7 |
|
if ($dataValue != floor($dataValue)) { |
571
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15 |
572
|
6 |
|
. ' ' |
573
|
6 |
|
. NumberFormat::FORMAT_DATE_TIME4; |
574
|
|
|
} else { |
575
|
7 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15; |
576
|
|
|
} |
577
|
|
|
|
578
|
7 |
|
break; |
579
|
6 |
|
case 'time': |
580
|
6 |
|
$type = DataType::TYPE_NUMERIC; |
581
|
|
|
|
582
|
6 |
|
$timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); |
583
|
|
|
|
584
|
6 |
|
$dataValue = Date::PHPToExcel( |
585
|
6 |
|
strtotime( |
586
|
6 |
|
'01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS') ?? []) |
587
|
6 |
|
) |
588
|
6 |
|
); |
589
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_TIME4; |
590
|
|
|
|
591
|
6 |
|
break; |
592
|
|
|
default: |
593
|
|
|
$dataValue = null; |
594
|
|
|
} |
595
|
|
|
} else { |
596
|
55 |
|
$type = DataType::TYPE_NULL; |
597
|
55 |
|
$dataValue = null; |
598
|
|
|
} |
599
|
|
|
|
600
|
61 |
|
if ($hasCalculatedValue) { |
601
|
23 |
|
$type = DataType::TYPE_FORMULA; |
602
|
23 |
|
$cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); |
603
|
23 |
|
$cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula); |
604
|
|
|
} |
605
|
|
|
|
606
|
61 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
607
|
53 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
608
|
|
|
} else { |
609
|
60 |
|
$colRepeats = 1; |
610
|
|
|
} |
611
|
|
|
|
612
|
61 |
|
if ($type !== null) { |
613
|
61 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
614
|
61 |
|
if ($i > 0) { |
615
|
53 |
|
++$columnID; |
616
|
|
|
} |
617
|
|
|
|
618
|
61 |
|
if ($type !== DataType::TYPE_NULL) { |
619
|
59 |
|
for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { |
620
|
59 |
|
$rID = $rowID + $rowAdjust; |
621
|
|
|
|
622
|
59 |
|
$cell = $spreadsheet->getActiveSheet() |
623
|
59 |
|
->getCell($columnID . $rID); |
624
|
|
|
|
625
|
|
|
// Set value |
626
|
59 |
|
if ($hasCalculatedValue) { |
627
|
23 |
|
$cell->setValueExplicit($cellDataFormula, $type); |
628
|
23 |
|
if ($cellDataType === 'array') { |
629
|
10 |
|
$cell->setFormulaAttributes(['t' => 'array', 'ref' => $cellDataRef]); |
630
|
|
|
} |
631
|
|
|
} else { |
632
|
58 |
|
$cell->setValueExplicit($dataValue, $type); |
633
|
|
|
} |
634
|
|
|
|
635
|
59 |
|
if ($hasCalculatedValue) { |
636
|
23 |
|
$cell->setCalculatedValue($dataValue, $type === DataType::TYPE_NUMERIC); |
637
|
|
|
} |
638
|
|
|
|
639
|
|
|
// Set other properties |
640
|
59 |
|
if ($formatting !== null) { |
641
|
11 |
|
$spreadsheet->getActiveSheet() |
642
|
11 |
|
->getStyle($columnID . $rID) |
643
|
11 |
|
->getNumberFormat() |
644
|
11 |
|
->setFormatCode($formatting); |
645
|
|
|
} else { |
646
|
59 |
|
$spreadsheet->getActiveSheet() |
647
|
59 |
|
->getStyle($columnID . $rID) |
648
|
59 |
|
->getNumberFormat() |
649
|
59 |
|
->setFormatCode(NumberFormat::FORMAT_GENERAL); |
650
|
|
|
} |
651
|
|
|
|
652
|
59 |
|
if ($hyperlink !== null) { |
653
|
7 |
|
if ($hyperlink[0] === '#') { |
654
|
1 |
|
$hyperlink = 'sheet://' . substr($hyperlink, 1); |
655
|
|
|
} |
656
|
7 |
|
$cell->getHyperlink() |
657
|
7 |
|
->setUrl($hyperlink); |
658
|
|
|
} |
659
|
|
|
} |
660
|
|
|
} |
661
|
|
|
} |
662
|
|
|
} |
663
|
|
|
|
664
|
|
|
// Merged cells |
665
|
61 |
|
$this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet); |
666
|
|
|
|
667
|
61 |
|
++$columnID; |
668
|
|
|
} |
669
|
61 |
|
$rowID += $rowRepeats; |
670
|
|
|
|
671
|
61 |
|
break; |
672
|
|
|
} |
673
|
|
|
} |
674
|
61 |
|
$pageSettings->setVisibilityForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
675
|
61 |
|
$pageSettings->setPrintSettingsForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
676
|
61 |
|
++$worksheetID; |
677
|
|
|
} |
678
|
|
|
|
679
|
62 |
|
$autoFilterReader->read($workbookData); |
680
|
62 |
|
$definedNameReader->read($workbookData); |
681
|
|
|
} |
682
|
62 |
|
$spreadsheet->setActiveSheetIndex(0); |
683
|
|
|
|
684
|
61 |
|
if ($zip->locateName('settings.xml') !== false) { |
685
|
55 |
|
$this->processSettings($zip, $spreadsheet); |
686
|
|
|
} |
687
|
|
|
|
688
|
|
|
// Return |
689
|
61 |
|
return $spreadsheet; |
690
|
|
|
} |
691
|
|
|
|
692
|
55 |
|
private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): void |
693
|
|
|
{ |
694
|
55 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
695
|
55 |
|
$dom->loadXML( |
696
|
55 |
|
$this->getSecurityScannerOrThrow() |
697
|
55 |
|
->scan($zip->getFromName('settings.xml')) |
698
|
55 |
|
); |
699
|
|
|
//$xlinkNs = $dom->lookupNamespaceUri('xlink'); |
700
|
55 |
|
$configNs = (string) $dom->lookupNamespaceUri('config'); |
701
|
|
|
//$oooNs = $dom->lookupNamespaceUri('ooo'); |
702
|
55 |
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
703
|
55 |
|
$settings = $dom->getElementsByTagNameNS($officeNs, 'settings') |
704
|
55 |
|
->item(0); |
705
|
55 |
|
if ($settings !== null) { |
706
|
55 |
|
$this->lookForActiveSheet($settings, $spreadsheet, $configNs); |
707
|
55 |
|
$this->lookForSelectedCells($settings, $spreadsheet, $configNs); |
708
|
|
|
} |
709
|
|
|
} |
710
|
|
|
|
711
|
55 |
|
private function lookForActiveSheet(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
712
|
|
|
{ |
713
|
|
|
/** @var DOMElement $t */ |
714
|
55 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item') as $t) { |
715
|
54 |
|
if ($t->getAttributeNs($configNs, 'name') === 'ActiveTable') { |
716
|
|
|
try { |
717
|
54 |
|
$spreadsheet->setActiveSheetIndexByName($t->nodeValue ?? ''); |
718
|
1 |
|
} catch (Throwable) { |
719
|
|
|
// do nothing |
720
|
|
|
} |
721
|
|
|
|
722
|
54 |
|
break; |
723
|
|
|
} |
724
|
|
|
} |
725
|
|
|
} |
726
|
|
|
|
727
|
55 |
|
private function lookForSelectedCells(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
728
|
|
|
{ |
729
|
|
|
/** @var DOMElement $t */ |
730
|
55 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item-map-named') as $t) { |
731
|
54 |
|
if ($t->getAttributeNs($configNs, 'name') === 'Tables') { |
732
|
54 |
|
foreach ($t->getElementsByTagNameNS($configNs, 'config-item-map-entry') as $ws) { |
733
|
54 |
|
$setRow = $setCol = ''; |
734
|
54 |
|
$wsname = $ws->getAttributeNs($configNs, 'name'); |
735
|
54 |
|
foreach ($ws->getElementsByTagNameNS($configNs, 'config-item') as $configItem) { |
736
|
54 |
|
$attrName = $configItem->getAttributeNs($configNs, 'name'); |
737
|
54 |
|
if ($attrName === 'CursorPositionX') { |
738
|
54 |
|
$setCol = $configItem->nodeValue; |
739
|
|
|
} |
740
|
54 |
|
if ($attrName === 'CursorPositionY') { |
741
|
54 |
|
$setRow = $configItem->nodeValue; |
742
|
|
|
} |
743
|
|
|
} |
744
|
54 |
|
$this->setSelected($spreadsheet, $wsname, "$setCol", "$setRow"); |
745
|
|
|
} |
746
|
|
|
|
747
|
54 |
|
break; |
748
|
|
|
} |
749
|
|
|
} |
750
|
|
|
} |
751
|
|
|
|
752
|
54 |
|
private function setSelected(Spreadsheet $spreadsheet, string $wsname, string $setCol, string $setRow): void |
753
|
|
|
{ |
754
|
54 |
|
if (is_numeric($setCol) && is_numeric($setRow)) { |
755
|
54 |
|
$sheet = $spreadsheet->getSheetByName($wsname); |
756
|
54 |
|
if ($sheet !== null) { |
757
|
54 |
|
$sheet->setSelectedCells([(int) $setCol + 1, (int) $setRow + 1]); |
758
|
|
|
} |
759
|
|
|
} |
760
|
|
|
} |
761
|
|
|
|
762
|
|
|
/** |
763
|
|
|
* Recursively scan element. |
764
|
|
|
*/ |
765
|
59 |
|
protected function scanElementForText(DOMNode $element): string |
766
|
|
|
{ |
767
|
59 |
|
$str = ''; |
768
|
59 |
|
foreach ($element->childNodes as $child) { |
769
|
|
|
/** @var DOMNode $child */ |
770
|
59 |
|
if ($child->nodeType == XML_TEXT_NODE) { |
771
|
59 |
|
$str .= $child->nodeValue; |
772
|
16 |
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:line-break') { |
773
|
1 |
|
$str .= "\n"; |
774
|
16 |
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { |
775
|
|
|
// It's a space |
776
|
|
|
|
777
|
|
|
// Multiple spaces? |
778
|
4 |
|
$attributes = $child->attributes; |
779
|
|
|
/** @var ?DOMAttr $cAttr */ |
780
|
4 |
|
$cAttr = ($attributes === null) ? null : $attributes->getNamedItem('c'); |
781
|
4 |
|
$multiplier = self::getMultiplier($cAttr); |
782
|
4 |
|
$str .= str_repeat(' ', $multiplier); |
783
|
|
|
} |
784
|
|
|
|
785
|
59 |
|
if ($child->hasChildNodes()) { |
786
|
16 |
|
$str .= $this->scanElementForText($child); |
787
|
|
|
} |
788
|
|
|
} |
789
|
|
|
|
790
|
59 |
|
return $str; |
791
|
|
|
} |
792
|
|
|
|
793
|
4 |
|
private static function getMultiplier(?DOMAttr $cAttr): int |
794
|
|
|
{ |
795
|
4 |
|
if ($cAttr) { |
796
|
4 |
|
$multiplier = (int) $cAttr->nodeValue; |
797
|
|
|
} else { |
798
|
4 |
|
$multiplier = 1; |
799
|
|
|
} |
800
|
|
|
|
801
|
4 |
|
return $multiplier; |
802
|
|
|
} |
803
|
|
|
|
804
|
11 |
|
private function parseRichText(string $is): RichText |
805
|
|
|
{ |
806
|
11 |
|
$value = new RichText(); |
807
|
11 |
|
$value->createText($is); |
808
|
|
|
|
809
|
11 |
|
return $value; |
810
|
|
|
} |
811
|
|
|
|
812
|
61 |
|
private function processMergedCells( |
813
|
|
|
DOMElement $cellData, |
814
|
|
|
string $tableNs, |
815
|
|
|
string $type, |
816
|
|
|
string $columnID, |
817
|
|
|
int $rowID, |
818
|
|
|
Spreadsheet $spreadsheet |
819
|
|
|
): void { |
820
|
|
|
if ( |
821
|
61 |
|
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned') |
822
|
61 |
|
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') |
823
|
|
|
) { |
824
|
16 |
|
if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) { |
825
|
16 |
|
$columnTo = $columnID; |
826
|
|
|
|
827
|
16 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { |
828
|
15 |
|
$columnIndex = Coordinate::columnIndexFromString($columnID); |
829
|
15 |
|
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); |
830
|
15 |
|
$columnIndex -= 2; |
831
|
|
|
|
832
|
15 |
|
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); |
833
|
|
|
} |
834
|
|
|
|
835
|
16 |
|
$rowTo = $rowID; |
836
|
|
|
|
837
|
16 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { |
838
|
16 |
|
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; |
839
|
|
|
} |
840
|
|
|
|
841
|
16 |
|
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; |
842
|
16 |
|
$spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE); |
843
|
|
|
} |
844
|
|
|
} |
845
|
|
|
} |
846
|
|
|
} |
847
|
|
|
|