1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMAttr; |
6
|
|
|
use DOMDocument; |
7
|
|
|
use DOMElement; |
8
|
|
|
use DOMNode; |
9
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\DataType; |
11
|
|
|
use PhpOffice\PhpSpreadsheet\Helper\Dimension as HelperDimension; |
12
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter; |
13
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames; |
14
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator; |
15
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings; |
16
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; |
17
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; |
18
|
|
|
use PhpOffice\PhpSpreadsheet\RichText\RichText; |
19
|
|
|
use PhpOffice\PhpSpreadsheet\Settings; |
20
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\Date; |
21
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\File; |
22
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
23
|
|
|
use PhpOffice\PhpSpreadsheet\Style\NumberFormat; |
24
|
|
|
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; |
25
|
|
|
use Throwable; |
26
|
|
|
use XMLReader; |
27
|
|
|
use ZipArchive; |
28
|
|
|
|
29
|
|
|
class Ods extends BaseReader |
30
|
|
|
{ |
31
|
|
|
const INITIAL_FILE = 'content.xml'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Create a new Ods Reader instance. |
35
|
|
|
*/ |
36
|
68 |
|
public function __construct() |
37
|
|
|
{ |
38
|
68 |
|
parent::__construct(); |
39
|
68 |
|
$this->securityScanner = XmlScanner::getInstance($this); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Can the current IReader read the file? |
44
|
|
|
*/ |
45
|
14 |
|
public function canRead(string $filename): bool |
46
|
|
|
{ |
47
|
14 |
|
$mimeType = 'UNKNOWN'; |
48
|
|
|
|
49
|
|
|
// Load file |
50
|
|
|
|
51
|
14 |
|
if (File::testFileNoThrow($filename, '')) { |
52
|
3 |
|
$zip = new ZipArchive(); |
53
|
3 |
|
if ($zip->open($filename) === true) { |
54
|
|
|
// check if it is an OOXML archive |
55
|
3 |
|
$stat = $zip->statName('mimetype'); |
56
|
3 |
|
if (!empty($stat) && ($stat['size'] <= 255)) { |
57
|
2 |
|
$mimeType = $zip->getFromName($stat['name']); |
58
|
1 |
|
} elseif ($zip->statName('META-INF/manifest.xml')) { |
59
|
1 |
|
$xml = simplexml_load_string( |
60
|
1 |
|
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')), |
61
|
1 |
|
'SimpleXMLElement', |
62
|
1 |
|
Settings::getLibXmlLoaderOptions() |
63
|
1 |
|
); |
64
|
1 |
|
if ($xml !== false) { |
65
|
1 |
|
$namespacesContent = $xml->getNamespaces(true); |
66
|
1 |
|
if (isset($namespacesContent['manifest'])) { |
67
|
1 |
|
$manifest = $xml->children($namespacesContent['manifest']); |
68
|
1 |
|
foreach ($manifest as $manifestDataSet) { |
69
|
|
|
/** @scrutinizer ignore-call */ |
70
|
1 |
|
$manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); |
71
|
1 |
|
if ($manifestAttributes && $manifestAttributes->{'full-path'} == '/') { |
72
|
1 |
|
$mimeType = (string) $manifestAttributes->{'media-type'}; |
73
|
|
|
|
74
|
1 |
|
break; |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
|
81
|
3 |
|
$zip->close(); |
82
|
|
|
} |
83
|
|
|
} |
84
|
|
|
|
85
|
14 |
|
return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. |
90
|
|
|
* |
91
|
|
|
* @param string $filename |
92
|
|
|
* |
93
|
|
|
* @return string[] |
94
|
|
|
*/ |
95
|
5 |
|
public function listWorksheetNames($filename) |
96
|
|
|
{ |
97
|
5 |
|
File::assertFile($filename, self::INITIAL_FILE); |
98
|
|
|
|
99
|
1 |
|
$worksheetNames = []; |
100
|
|
|
|
101
|
1 |
|
$xml = new XMLReader(); |
102
|
1 |
|
$xml->xml( |
103
|
1 |
|
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE), |
104
|
1 |
|
null, |
105
|
1 |
|
Settings::getLibXmlLoaderOptions() |
106
|
1 |
|
); |
107
|
1 |
|
$xml->setParserProperty(2, true); |
108
|
|
|
|
109
|
|
|
// Step into the first level of content of the XML |
110
|
1 |
|
$xml->read(); |
111
|
1 |
|
while ($xml->read()) { |
112
|
|
|
// Quickly jump through to the office:body node |
113
|
1 |
|
while (self::getXmlName($xml) !== 'office:body') { |
114
|
1 |
|
if ($xml->isEmptyElement) { |
115
|
1 |
|
$xml->read(); |
116
|
|
|
} else { |
117
|
1 |
|
$xml->next(); |
118
|
|
|
} |
119
|
|
|
} |
120
|
|
|
// Now read each node until we find our first table:table node |
121
|
1 |
|
while ($xml->read()) { |
122
|
1 |
|
$xmlName = self::getXmlName($xml); |
123
|
1 |
|
if ($xmlName == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
124
|
|
|
// Loop through each table:table node reading the table:name attribute for each worksheet name |
125
|
|
|
do { |
126
|
1 |
|
$worksheetName = $xml->getAttribute('table:name'); |
127
|
1 |
|
if (!empty($worksheetName)) { |
128
|
1 |
|
$worksheetNames[] = $worksheetName; |
129
|
|
|
} |
130
|
1 |
|
$xml->next(); |
131
|
1 |
|
} while (self::getXmlName($xml) == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); |
132
|
|
|
} |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
|
136
|
1 |
|
return $worksheetNames; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
141
|
|
|
* |
142
|
|
|
* @param string $filename |
143
|
|
|
* |
144
|
|
|
* @return array |
145
|
|
|
*/ |
146
|
5 |
|
public function listWorksheetInfo($filename) |
147
|
|
|
{ |
148
|
5 |
|
File::assertFile($filename, self::INITIAL_FILE); |
149
|
|
|
|
150
|
1 |
|
$worksheetInfo = []; |
151
|
|
|
|
152
|
1 |
|
$xml = new XMLReader(); |
153
|
1 |
|
$xml->xml( |
154
|
1 |
|
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE), |
155
|
1 |
|
null, |
156
|
1 |
|
Settings::getLibXmlLoaderOptions() |
157
|
1 |
|
); |
158
|
1 |
|
$xml->setParserProperty(2, true); |
159
|
|
|
|
160
|
|
|
// Step into the first level of content of the XML |
161
|
1 |
|
$xml->read(); |
162
|
1 |
|
while ($xml->read()) { |
163
|
|
|
// Quickly jump through to the office:body node |
164
|
1 |
|
while (self::getXmlName($xml) !== 'office:body') { |
165
|
1 |
|
if ($xml->isEmptyElement) { |
166
|
1 |
|
$xml->read(); |
167
|
|
|
} else { |
168
|
1 |
|
$xml->next(); |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
// Now read each node until we find our first table:table node |
172
|
1 |
|
while ($xml->read()) { |
173
|
1 |
|
if (self::getXmlName($xml) == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
174
|
1 |
|
$worksheetNames[] = $xml->getAttribute('table:name'); |
175
|
|
|
|
176
|
1 |
|
$tmpInfo = [ |
177
|
1 |
|
'worksheetName' => $xml->getAttribute('table:name'), |
178
|
1 |
|
'lastColumnLetter' => 'A', |
179
|
1 |
|
'lastColumnIndex' => 0, |
180
|
1 |
|
'totalRows' => 0, |
181
|
1 |
|
'totalColumns' => 0, |
182
|
1 |
|
]; |
183
|
|
|
|
184
|
|
|
// Loop through each child node of the table:table element reading |
185
|
1 |
|
$currCells = 0; |
186
|
|
|
do { |
187
|
1 |
|
$xml->read(); |
188
|
1 |
|
if (self::getXmlName($xml) == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { |
189
|
1 |
|
$rowspan = $xml->getAttribute('table:number-rows-repeated'); |
190
|
1 |
|
$rowspan = empty($rowspan) ? 1 : $rowspan; |
191
|
1 |
|
$tmpInfo['totalRows'] += $rowspan; |
192
|
1 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
193
|
1 |
|
$currCells = 0; |
194
|
|
|
// Step into the row |
195
|
1 |
|
$xml->read(); |
196
|
|
|
do { |
197
|
1 |
|
$doread = true; |
198
|
1 |
|
if (self::getXmlName($xml) == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
199
|
1 |
|
if (!$xml->isEmptyElement) { |
200
|
1 |
|
++$currCells; |
201
|
1 |
|
$xml->next(); |
202
|
1 |
|
$doread = false; |
203
|
|
|
} |
204
|
1 |
|
} elseif (self::getXmlName($xml) == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
205
|
1 |
|
$mergeSize = $xml->getAttribute('table:number-columns-repeated'); |
206
|
1 |
|
$currCells += (int) $mergeSize; |
207
|
|
|
} |
208
|
1 |
|
if ($doread) { |
209
|
1 |
|
$xml->read(); |
210
|
|
|
} |
211
|
1 |
|
} while (self::getXmlName($xml) != 'table:table-row'); |
212
|
|
|
} |
213
|
1 |
|
} while (self::getXmlName($xml) != 'table:table'); |
214
|
|
|
|
215
|
1 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
216
|
1 |
|
$tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; |
217
|
1 |
|
$tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); |
218
|
1 |
|
$worksheetInfo[] = $tmpInfo; |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
} |
222
|
|
|
|
223
|
1 |
|
return $worksheetInfo; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* Counteract Phpstan caching. |
228
|
|
|
* |
229
|
|
|
* @phpstan-impure |
230
|
|
|
*/ |
231
|
2 |
|
private static function getXmlName(XMLReader $xml): string |
232
|
|
|
{ |
233
|
2 |
|
return $xml->name; |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
/** |
237
|
|
|
* Loads PhpSpreadsheet from file. |
238
|
|
|
*/ |
239
|
41 |
|
protected function loadSpreadsheetFromFile(string $filename): Spreadsheet |
240
|
|
|
{ |
241
|
|
|
// Create new Spreadsheet |
242
|
41 |
|
$spreadsheet = new Spreadsheet(); |
243
|
|
|
|
244
|
|
|
// Load into this instance |
245
|
41 |
|
return $this->loadIntoExisting($filename, $spreadsheet); |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
/** |
249
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
250
|
|
|
* |
251
|
|
|
* @param string $filename |
252
|
|
|
* |
253
|
|
|
* @return Spreadsheet |
254
|
|
|
*/ |
255
|
45 |
|
public function loadIntoExisting($filename, Spreadsheet $spreadsheet) |
256
|
|
|
{ |
257
|
45 |
|
File::assertFile($filename, self::INITIAL_FILE); |
258
|
|
|
|
259
|
41 |
|
$zip = new ZipArchive(); |
260
|
41 |
|
$zip->open($filename); |
261
|
|
|
|
262
|
|
|
// Meta |
263
|
|
|
|
264
|
41 |
|
$xml = @simplexml_load_string( |
265
|
41 |
|
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')), |
266
|
41 |
|
'SimpleXMLElement', |
267
|
41 |
|
Settings::getLibXmlLoaderOptions() |
268
|
41 |
|
); |
269
|
41 |
|
if ($xml === false) { |
270
|
1 |
|
throw new Exception('Unable to read data from {$pFilename}'); |
271
|
|
|
} |
272
|
|
|
|
273
|
40 |
|
$namespacesMeta = $xml->getNamespaces(true); |
274
|
|
|
|
275
|
40 |
|
(new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); |
276
|
|
|
|
277
|
|
|
// Styles |
278
|
|
|
|
279
|
40 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
280
|
40 |
|
$dom->loadXML( |
281
|
40 |
|
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')), |
282
|
40 |
|
Settings::getLibXmlLoaderOptions() |
283
|
40 |
|
); |
284
|
|
|
|
285
|
40 |
|
$pageSettings = new PageSettings($dom); |
286
|
|
|
|
287
|
|
|
// Main Content |
288
|
|
|
|
289
|
40 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
290
|
40 |
|
$dom->loadXML( |
291
|
40 |
|
$this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)), |
292
|
40 |
|
Settings::getLibXmlLoaderOptions() |
293
|
40 |
|
); |
294
|
|
|
|
295
|
40 |
|
$officeNs = $dom->lookupNamespaceUri('office'); |
296
|
40 |
|
$tableNs = $dom->lookupNamespaceUri('table'); |
297
|
40 |
|
$textNs = $dom->lookupNamespaceUri('text'); |
298
|
40 |
|
$xlinkNs = $dom->lookupNamespaceUri('xlink'); |
299
|
40 |
|
$styleNs = $dom->lookupNamespaceUri('style'); |
300
|
|
|
|
301
|
40 |
|
$pageSettings->readStyleCrossReferences($dom); |
302
|
|
|
|
303
|
40 |
|
$autoFilterReader = new AutoFilter($spreadsheet, $tableNs); |
304
|
40 |
|
$definedNameReader = new DefinedNames($spreadsheet, $tableNs); |
305
|
40 |
|
$columnWidths = []; |
306
|
40 |
|
$automaticStyle0 = $dom->getElementsByTagNameNS($officeNs, 'automatic-styles')->item(0); |
307
|
40 |
|
$automaticStyles = ($automaticStyle0 === null) ? [] : $automaticStyle0->getElementsByTagNameNS($styleNs, 'style'); |
308
|
40 |
|
foreach ($automaticStyles as $automaticStyle) { |
309
|
40 |
|
$styleName = $automaticStyle->getAttributeNS($styleNs, 'name'); |
310
|
40 |
|
$styleFamily = $automaticStyle->getAttributeNS($styleNs, 'family'); |
311
|
40 |
|
if ($styleFamily === 'table-column') { |
312
|
26 |
|
$tcprops = $automaticStyle->getElementsByTagNameNS($styleNs, 'table-column-properties'); |
313
|
26 |
|
if ($tcprops !== null) { |
314
|
26 |
|
$tcprop = $tcprops->item(0); |
315
|
26 |
|
if ($tcprop !== null) { |
316
|
26 |
|
$columnWidth = $tcprop->getAttributeNs($styleNs, 'column-width'); |
317
|
26 |
|
$columnWidths[$styleName] = $columnWidth; |
318
|
|
|
} |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
// Content |
324
|
40 |
|
$item0 = $dom->getElementsByTagNameNS($officeNs, 'body')->item(0); |
325
|
40 |
|
$spreadsheets = ($item0 === null) ? [] : $item0->getElementsByTagNameNS($officeNs, 'spreadsheet'); |
326
|
|
|
|
327
|
40 |
|
foreach ($spreadsheets as $workbookData) { |
328
|
|
|
/** @var DOMElement $workbookData */ |
329
|
40 |
|
$tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); |
330
|
|
|
|
331
|
40 |
|
$worksheetID = 0; |
332
|
40 |
|
foreach ($tables as $worksheetDataSet) { |
333
|
|
|
/** @var DOMElement $worksheetDataSet */ |
334
|
40 |
|
$worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); |
335
|
|
|
|
336
|
|
|
// Check loadSheetsOnly |
337
|
|
|
if ( |
338
|
40 |
|
$this->loadSheetsOnly !== null |
339
|
|
|
&& $worksheetName |
340
|
40 |
|
&& !in_array($worksheetName, $this->loadSheetsOnly) |
341
|
|
|
) { |
342
|
2 |
|
continue; |
343
|
|
|
} |
344
|
|
|
|
345
|
40 |
|
$worksheetStyleName = $worksheetDataSet->getAttributeNS($tableNs, 'style-name'); |
346
|
|
|
|
347
|
|
|
// Create sheet |
348
|
40 |
|
if ($worksheetID > 0) { |
349
|
14 |
|
$spreadsheet->createSheet(); // First sheet is added by default |
350
|
|
|
} |
351
|
40 |
|
$spreadsheet->setActiveSheetIndex($worksheetID); |
352
|
|
|
|
353
|
40 |
|
if ($worksheetName || is_numeric($worksheetName)) { |
354
|
|
|
// Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in |
355
|
|
|
// formula cells... during the load, all formulae should be correct, and we're simply |
356
|
|
|
// bringing the worksheet name in line with the formula, not the reverse |
357
|
40 |
|
$spreadsheet->getActiveSheet()->setTitle((string) $worksheetName, false, false); |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
// Go through every child of table element |
361
|
40 |
|
$rowID = 1; |
362
|
40 |
|
$tableColumnIndex = 1; |
363
|
40 |
|
foreach ($worksheetDataSet->childNodes as $childNode) { |
364
|
|
|
/** @var DOMElement $childNode */ |
365
|
|
|
|
366
|
|
|
// Filter elements which are not under the "table" ns |
367
|
40 |
|
if ($childNode->namespaceURI != $tableNs) { |
368
|
22 |
|
continue; |
369
|
|
|
} |
370
|
|
|
|
371
|
40 |
|
$key = $childNode->nodeName; |
372
|
|
|
|
373
|
|
|
// Remove ns from node name |
374
|
40 |
|
if (strpos($key, ':') !== false) { |
375
|
40 |
|
$keyChunks = explode(':', $key); |
376
|
40 |
|
$key = array_pop($keyChunks); |
377
|
|
|
} |
378
|
|
|
|
379
|
|
|
switch ($key) { |
380
|
40 |
|
case 'table-header-rows': |
381
|
|
|
/// TODO :: Figure this out. This is only a partial implementation I guess. |
382
|
|
|
// ($rowData it's not used at all and I'm not sure that PHPExcel |
383
|
|
|
// has an API for this) |
384
|
|
|
|
385
|
|
|
// foreach ($rowData as $keyRowData => $cellData) { |
386
|
|
|
// $rowData = $cellData; |
387
|
|
|
// break; |
388
|
|
|
// } |
389
|
|
|
break; |
390
|
40 |
|
case 'table-column': |
391
|
26 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
392
|
25 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-columns-repeated'); |
393
|
|
|
} else { |
394
|
11 |
|
$rowRepeats = 1; |
395
|
|
|
} |
396
|
26 |
|
$tableStyleName = $childNode->getAttributeNS($tableNs, 'style-name'); |
397
|
26 |
|
if (isset($columnWidths[$tableStyleName])) { |
398
|
26 |
|
$columnWidth = new HelperDimension($columnWidths[$tableStyleName]); |
399
|
26 |
|
$tableColumnString = Coordinate::stringFromColumnIndex($tableColumnIndex); |
400
|
26 |
|
for ($rowRepeats2 = $rowRepeats; $rowRepeats2 > 0; --$rowRepeats2) { |
401
|
26 |
|
$spreadsheet->getActiveSheet() |
402
|
26 |
|
->getColumnDimension($tableColumnString) |
403
|
26 |
|
->setWidth($columnWidth->toUnit('cm'), 'cm'); |
404
|
26 |
|
++$tableColumnString; |
405
|
|
|
} |
406
|
|
|
} |
407
|
26 |
|
$tableColumnIndex += $rowRepeats; |
408
|
|
|
|
409
|
26 |
|
break; |
410
|
40 |
|
case 'table-row': |
411
|
40 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { |
412
|
15 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); |
413
|
|
|
} else { |
414
|
39 |
|
$rowRepeats = 1; |
415
|
|
|
} |
416
|
|
|
|
417
|
40 |
|
$columnID = 'A'; |
418
|
|
|
/** @var DOMElement $cellData */ |
419
|
40 |
|
foreach ($childNode->childNodes as $cellData) { |
420
|
40 |
|
if ($this->getReadFilter() !== null) { |
421
|
40 |
|
if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { |
422
|
2 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
423
|
2 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
424
|
|
|
} else { |
425
|
2 |
|
$colRepeats = 1; |
426
|
|
|
} |
427
|
|
|
|
428
|
2 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
429
|
2 |
|
++$columnID; |
430
|
|
|
} |
431
|
|
|
|
432
|
2 |
|
continue; |
433
|
|
|
} |
434
|
|
|
} |
435
|
|
|
|
436
|
|
|
// Initialize variables |
437
|
40 |
|
$formatting = $hyperlink = null; |
438
|
40 |
|
$hasCalculatedValue = false; |
439
|
40 |
|
$cellDataFormula = ''; |
440
|
|
|
|
441
|
40 |
|
if ($cellData->hasAttributeNS($tableNs, 'formula')) { |
442
|
13 |
|
$cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); |
443
|
13 |
|
$hasCalculatedValue = true; |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
// Annotations |
447
|
40 |
|
$annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); |
448
|
|
|
|
449
|
40 |
|
if ($annotation->length > 0 && $annotation->item(0) !== null) { |
450
|
9 |
|
$textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); |
451
|
|
|
|
452
|
9 |
|
if ($textNode->length > 0 && $textNode->item(0) !== null) { |
453
|
9 |
|
$text = $this->scanElementForText($textNode->item(0)); |
454
|
|
|
|
455
|
9 |
|
$spreadsheet->getActiveSheet() |
456
|
9 |
|
->getComment($columnID . $rowID) |
457
|
9 |
|
->setText($this->parseRichText($text)); |
458
|
|
|
// ->setAuthor( $author ) |
459
|
|
|
} |
460
|
|
|
} |
461
|
|
|
|
462
|
|
|
// Content |
463
|
|
|
|
464
|
|
|
/** @var DOMElement[] $paragraphs */ |
465
|
40 |
|
$paragraphs = []; |
466
|
|
|
|
467
|
40 |
|
foreach ($cellData->childNodes as $item) { |
468
|
|
|
/** @var DOMElement $item */ |
469
|
|
|
|
470
|
|
|
// Filter text:p elements |
471
|
38 |
|
if ($item->nodeName == 'text:p') { |
472
|
38 |
|
$paragraphs[] = $item; |
473
|
|
|
} |
474
|
|
|
} |
475
|
|
|
|
476
|
40 |
|
if (count($paragraphs) > 0) { |
477
|
|
|
// Consolidate if there are multiple p records (maybe with spans as well) |
478
|
38 |
|
$dataArray = []; |
479
|
|
|
|
480
|
|
|
// Text can have multiple text:p and within those, multiple text:span. |
481
|
|
|
// text:p newlines, but text:span does not. |
482
|
|
|
// Also, here we assume there is no text data is span fields are specified, since |
483
|
|
|
// we have no way of knowing proper positioning anyway. |
484
|
|
|
|
485
|
38 |
|
foreach ($paragraphs as $pData) { |
486
|
38 |
|
$dataArray[] = $this->scanElementForText($pData); |
487
|
|
|
} |
488
|
38 |
|
$allCellDataText = implode("\n", $dataArray); |
489
|
|
|
|
490
|
38 |
|
$type = $cellData->getAttributeNS($officeNs, 'value-type'); |
491
|
|
|
|
492
|
|
|
switch ($type) { |
493
|
38 |
|
case 'string': |
494
|
27 |
|
$type = DataType::TYPE_STRING; |
495
|
27 |
|
$dataValue = $allCellDataText; |
496
|
|
|
|
497
|
27 |
|
foreach ($paragraphs as $paragraph) { |
498
|
27 |
|
$link = $paragraph->getElementsByTagNameNS($textNs, 'a'); |
499
|
27 |
|
if ($link->length > 0 && $link->item(0) !== null) { |
500
|
6 |
|
$hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); |
501
|
|
|
} |
502
|
|
|
} |
503
|
|
|
|
504
|
27 |
|
break; |
505
|
26 |
|
case 'boolean': |
506
|
6 |
|
$type = DataType::TYPE_BOOL; |
507
|
6 |
|
$dataValue = ($allCellDataText == 'TRUE') ? true : false; |
508
|
|
|
|
509
|
6 |
|
break; |
510
|
26 |
|
case 'percentage': |
511
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
512
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
513
|
|
|
|
514
|
|
|
// percentage should always be float |
515
|
|
|
//if (floor($dataValue) == $dataValue) { |
516
|
|
|
// $dataValue = (int) $dataValue; |
517
|
|
|
//} |
518
|
4 |
|
$formatting = NumberFormat::FORMAT_PERCENTAGE_00; |
519
|
|
|
|
520
|
4 |
|
break; |
521
|
26 |
|
case 'currency': |
522
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
523
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
524
|
|
|
|
525
|
4 |
|
if (floor($dataValue) == $dataValue) { |
526
|
4 |
|
$dataValue = (int) $dataValue; |
527
|
|
|
} |
528
|
4 |
|
$formatting = NumberFormat::FORMAT_CURRENCY_USD_INTEGER; |
529
|
|
|
|
530
|
4 |
|
break; |
531
|
22 |
|
case 'float': |
532
|
22 |
|
$type = DataType::TYPE_NUMERIC; |
533
|
22 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
534
|
|
|
|
535
|
22 |
|
if (floor($dataValue) == $dataValue) { |
536
|
21 |
|
if ($dataValue == (int) $dataValue) { |
537
|
21 |
|
$dataValue = (int) $dataValue; |
538
|
|
|
} |
539
|
|
|
} |
540
|
|
|
|
541
|
22 |
|
break; |
542
|
6 |
|
case 'date': |
543
|
6 |
|
$type = DataType::TYPE_NUMERIC; |
544
|
6 |
|
$value = $cellData->getAttributeNS($officeNs, 'date-value'); |
545
|
6 |
|
$dataValue = Date::convertIsoDate($value); |
546
|
|
|
|
547
|
6 |
|
if ($dataValue != floor($dataValue)) { |
548
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15 |
549
|
6 |
|
. ' ' |
550
|
6 |
|
. NumberFormat::FORMAT_DATE_TIME4; |
551
|
|
|
} else { |
552
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15; |
553
|
|
|
} |
554
|
|
|
|
555
|
6 |
|
break; |
556
|
6 |
|
case 'time': |
557
|
6 |
|
$type = DataType::TYPE_NUMERIC; |
558
|
|
|
|
559
|
6 |
|
$timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); |
560
|
|
|
|
561
|
6 |
|
$dataValue = Date::PHPToExcel( |
562
|
6 |
|
strtotime( |
563
|
6 |
|
'01-01-1970 ' . implode(':', /** @scrutinizer ignore-type */ sscanf($timeValue, 'PT%dH%dM%dS') ?? []) |
564
|
6 |
|
) |
565
|
6 |
|
); |
566
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_TIME4; |
567
|
|
|
|
568
|
6 |
|
break; |
569
|
|
|
default: |
570
|
38 |
|
$dataValue = null; |
571
|
|
|
} |
572
|
|
|
} else { |
573
|
36 |
|
$type = DataType::TYPE_NULL; |
574
|
36 |
|
$dataValue = null; |
575
|
|
|
} |
576
|
|
|
|
577
|
40 |
|
if ($hasCalculatedValue) { |
578
|
13 |
|
$type = DataType::TYPE_FORMULA; |
579
|
13 |
|
$cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); |
580
|
13 |
|
$cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula); |
581
|
|
|
} |
582
|
|
|
|
583
|
40 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
584
|
34 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
585
|
|
|
} else { |
586
|
39 |
|
$colRepeats = 1; |
587
|
|
|
} |
588
|
|
|
|
589
|
40 |
|
if ($type !== null) { |
590
|
40 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
591
|
40 |
|
if ($i > 0) { |
592
|
34 |
|
++$columnID; |
593
|
|
|
} |
594
|
|
|
|
595
|
40 |
|
if ($type !== DataType::TYPE_NULL) { |
596
|
38 |
|
for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { |
597
|
38 |
|
$rID = $rowID + $rowAdjust; |
598
|
|
|
|
599
|
38 |
|
$cell = $spreadsheet->getActiveSheet() |
600
|
38 |
|
->getCell($columnID . $rID); |
601
|
|
|
|
602
|
|
|
// Set value |
603
|
38 |
|
if ($hasCalculatedValue) { |
604
|
13 |
|
$cell->setValueExplicit($cellDataFormula, $type); |
605
|
|
|
} else { |
606
|
37 |
|
$cell->setValueExplicit($dataValue, $type); |
607
|
|
|
} |
608
|
|
|
|
609
|
38 |
|
if ($hasCalculatedValue) { |
610
|
13 |
|
$cell->setCalculatedValue($dataValue); |
611
|
|
|
} |
612
|
|
|
|
613
|
|
|
// Set other properties |
614
|
38 |
|
if ($formatting !== null) { |
615
|
10 |
|
$spreadsheet->getActiveSheet() |
616
|
10 |
|
->getStyle($columnID . $rID) |
617
|
10 |
|
->getNumberFormat() |
618
|
10 |
|
->setFormatCode($formatting); |
619
|
|
|
} else { |
620
|
38 |
|
$spreadsheet->getActiveSheet() |
621
|
38 |
|
->getStyle($columnID . $rID) |
622
|
38 |
|
->getNumberFormat() |
623
|
38 |
|
->setFormatCode(NumberFormat::FORMAT_GENERAL); |
624
|
|
|
} |
625
|
|
|
|
626
|
38 |
|
if ($hyperlink !== null) { |
627
|
6 |
|
$cell->getHyperlink() |
628
|
6 |
|
->setUrl($hyperlink); |
629
|
|
|
} |
630
|
|
|
} |
631
|
|
|
} |
632
|
|
|
} |
633
|
|
|
} |
634
|
|
|
|
635
|
|
|
// Merged cells |
636
|
40 |
|
$this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet); |
637
|
|
|
|
638
|
40 |
|
++$columnID; |
639
|
|
|
} |
640
|
40 |
|
$rowID += $rowRepeats; |
641
|
|
|
|
642
|
40 |
|
break; |
643
|
|
|
} |
644
|
|
|
} |
645
|
40 |
|
$pageSettings->setVisibilityForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
646
|
40 |
|
$pageSettings->setPrintSettingsForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
647
|
40 |
|
++$worksheetID; |
648
|
|
|
} |
649
|
|
|
|
650
|
40 |
|
$autoFilterReader->read($workbookData); |
651
|
40 |
|
$definedNameReader->read($workbookData); |
652
|
|
|
} |
653
|
40 |
|
$spreadsheet->setActiveSheetIndex(0); |
654
|
|
|
|
655
|
40 |
|
if ($zip->locateName('settings.xml') !== false) { |
656
|
35 |
|
$this->processSettings($zip, $spreadsheet); |
657
|
|
|
} |
658
|
|
|
|
659
|
|
|
// Return |
660
|
40 |
|
return $spreadsheet; |
661
|
|
|
} |
662
|
|
|
|
663
|
35 |
|
private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): void |
664
|
|
|
{ |
665
|
35 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
666
|
35 |
|
$dom->loadXML( |
667
|
35 |
|
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')), |
668
|
35 |
|
Settings::getLibXmlLoaderOptions() |
669
|
35 |
|
); |
670
|
|
|
//$xlinkNs = $dom->lookupNamespaceUri('xlink'); |
671
|
35 |
|
$configNs = $dom->lookupNamespaceUri('config'); |
672
|
|
|
//$oooNs = $dom->lookupNamespaceUri('ooo'); |
673
|
35 |
|
$officeNs = $dom->lookupNamespaceUri('office'); |
674
|
35 |
|
$settings = $dom->getElementsByTagNameNS($officeNs, 'settings') |
675
|
35 |
|
->item(0); |
676
|
35 |
|
if ($settings !== null) { |
677
|
35 |
|
$this->lookForActiveSheet($settings, $spreadsheet, $configNs); |
678
|
35 |
|
$this->lookForSelectedCells($settings, $spreadsheet, $configNs); |
679
|
|
|
} |
680
|
|
|
} |
681
|
|
|
|
682
|
35 |
|
private function lookForActiveSheet(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
683
|
|
|
{ |
684
|
|
|
/** @var DOMElement $t */ |
685
|
35 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item') as $t) { |
686
|
35 |
|
if ($t->getAttributeNs($configNs, 'name') === 'ActiveTable') { |
687
|
|
|
try { |
688
|
35 |
|
$spreadsheet->setActiveSheetIndexByName($t->nodeValue ?? ''); |
689
|
1 |
|
} catch (Throwable $e) { |
690
|
|
|
// do nothing |
691
|
|
|
} |
692
|
|
|
|
693
|
35 |
|
break; |
694
|
|
|
} |
695
|
|
|
} |
696
|
|
|
} |
697
|
|
|
|
698
|
35 |
|
private function lookForSelectedCells(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
699
|
|
|
{ |
700
|
|
|
/** @var DOMElement $t */ |
701
|
35 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item-map-named') as $t) { |
702
|
35 |
|
if ($t->getAttributeNs($configNs, 'name') === 'Tables') { |
703
|
35 |
|
foreach ($t->getElementsByTagNameNS($configNs, 'config-item-map-entry') as $ws) { |
704
|
35 |
|
$setRow = $setCol = ''; |
705
|
35 |
|
$wsname = $ws->getAttributeNs($configNs, 'name'); |
706
|
35 |
|
foreach ($ws->getElementsByTagNameNS($configNs, 'config-item') as $configItem) { |
707
|
35 |
|
$attrName = $configItem->getAttributeNs($configNs, 'name'); |
708
|
35 |
|
if ($attrName === 'CursorPositionX') { |
709
|
35 |
|
$setCol = $configItem->nodeValue; |
710
|
|
|
} |
711
|
35 |
|
if ($attrName === 'CursorPositionY') { |
712
|
35 |
|
$setRow = $configItem->nodeValue; |
713
|
|
|
} |
714
|
|
|
} |
715
|
35 |
|
$this->setSelected($spreadsheet, $wsname, "$setCol", "$setRow"); |
716
|
|
|
} |
717
|
|
|
|
718
|
35 |
|
break; |
719
|
|
|
} |
720
|
|
|
} |
721
|
|
|
} |
722
|
|
|
|
723
|
35 |
|
private function setSelected(Spreadsheet $spreadsheet, string $wsname, string $setCol, string $setRow): void |
724
|
|
|
{ |
725
|
35 |
|
if (is_numeric($setCol) && is_numeric($setRow)) { |
726
|
35 |
|
$sheet = $spreadsheet->getSheetByName($wsname); |
727
|
35 |
|
if ($sheet !== null) { |
728
|
35 |
|
$sheet->setSelectedCells([(int) $setCol + 1, (int) $setRow + 1]); |
729
|
|
|
} |
730
|
|
|
} |
731
|
|
|
} |
732
|
|
|
|
733
|
|
|
/** |
734
|
|
|
* Recursively scan element. |
735
|
|
|
* |
736
|
|
|
* @return string |
737
|
|
|
*/ |
738
|
38 |
|
protected function scanElementForText(DOMNode $element) |
739
|
|
|
{ |
740
|
38 |
|
$str = ''; |
741
|
38 |
|
foreach ($element->childNodes as $child) { |
742
|
|
|
/** @var DOMNode $child */ |
743
|
38 |
|
if ($child->nodeType == XML_TEXT_NODE) { |
744
|
38 |
|
$str .= $child->nodeValue; |
745
|
12 |
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { |
746
|
|
|
// It's a space |
747
|
|
|
|
748
|
|
|
// Multiple spaces? |
749
|
4 |
|
$attributes = $child->attributes; |
750
|
|
|
/** @var ?DOMAttr $cAttr */ |
751
|
4 |
|
$cAttr = ($attributes === null) ? null : $attributes->getNamedItem('c'); |
752
|
4 |
|
$multiplier = self::getMultiplier($cAttr); |
753
|
4 |
|
$str .= str_repeat(' ', $multiplier); |
754
|
|
|
} |
755
|
|
|
|
756
|
38 |
|
if ($child->hasChildNodes()) { |
757
|
12 |
|
$str .= $this->scanElementForText($child); |
758
|
|
|
} |
759
|
|
|
} |
760
|
|
|
|
761
|
38 |
|
return $str; |
762
|
|
|
} |
763
|
|
|
|
764
|
4 |
|
private static function getMultiplier(?DOMAttr $cAttr): int |
765
|
|
|
{ |
766
|
4 |
|
if ($cAttr) { |
767
|
4 |
|
$multiplier = (int) $cAttr->nodeValue; |
768
|
|
|
} else { |
769
|
4 |
|
$multiplier = 1; |
770
|
|
|
} |
771
|
|
|
|
772
|
4 |
|
return $multiplier; |
773
|
|
|
} |
774
|
|
|
|
775
|
|
|
/** |
776
|
|
|
* @param string $is |
777
|
|
|
* |
778
|
|
|
* @return RichText |
779
|
|
|
*/ |
780
|
9 |
|
private function parseRichText($is) |
781
|
|
|
{ |
782
|
9 |
|
$value = new RichText(); |
783
|
9 |
|
$value->createText($is); |
784
|
|
|
|
785
|
9 |
|
return $value; |
786
|
|
|
} |
787
|
|
|
|
788
|
40 |
|
private function processMergedCells( |
789
|
|
|
DOMElement $cellData, |
790
|
|
|
string $tableNs, |
791
|
|
|
string $type, |
792
|
|
|
string $columnID, |
793
|
|
|
int $rowID, |
794
|
|
|
Spreadsheet $spreadsheet |
795
|
|
|
): void { |
796
|
|
|
if ( |
797
|
40 |
|
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned') |
798
|
40 |
|
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') |
799
|
|
|
) { |
800
|
15 |
|
if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) { |
801
|
15 |
|
$columnTo = $columnID; |
802
|
|
|
|
803
|
15 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { |
804
|
15 |
|
$columnIndex = Coordinate::columnIndexFromString($columnID); |
805
|
15 |
|
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); |
806
|
15 |
|
$columnIndex -= 2; |
807
|
|
|
|
808
|
15 |
|
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); |
809
|
|
|
} |
810
|
|
|
|
811
|
15 |
|
$rowTo = $rowID; |
812
|
|
|
|
813
|
15 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { |
814
|
15 |
|
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; |
815
|
|
|
} |
816
|
|
|
|
817
|
15 |
|
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; |
818
|
15 |
|
$spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE); |
819
|
|
|
} |
820
|
|
|
} |
821
|
|
|
} |
822
|
|
|
} |
823
|
|
|
|