1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMAttr; |
6
|
|
|
use DOMDocument; |
7
|
|
|
use DOMElement; |
8
|
|
|
use DOMNode; |
9
|
|
|
use DOMText; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
11
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\DataType; |
12
|
|
|
use PhpOffice\PhpSpreadsheet\Helper\Dimension as HelperDimension; |
13
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter; |
14
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames; |
15
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator; |
16
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings; |
17
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; |
18
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; |
19
|
|
|
use PhpOffice\PhpSpreadsheet\RichText\RichText; |
20
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\Date; |
21
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\File; |
22
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
23
|
|
|
use PhpOffice\PhpSpreadsheet\Style\NumberFormat; |
24
|
|
|
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; |
25
|
|
|
use Throwable; |
26
|
|
|
use XMLReader; |
27
|
|
|
use ZipArchive; |
28
|
|
|
|
29
|
|
|
class Ods extends BaseReader |
30
|
|
|
{ |
31
|
|
|
const INITIAL_FILE = 'content.xml'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Create a new Ods Reader instance. |
35
|
|
|
*/ |
36
|
99 |
|
public function __construct() |
37
|
|
|
{ |
38
|
99 |
|
parent::__construct(); |
39
|
99 |
|
$this->securityScanner = XmlScanner::getInstance($this); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Can the current IReader read the file? |
44
|
|
|
*/ |
45
|
17 |
|
public function canRead(string $filename): bool |
46
|
|
|
{ |
47
|
17 |
|
$mimeType = 'UNKNOWN'; |
48
|
|
|
|
49
|
|
|
// Load file |
50
|
|
|
|
51
|
17 |
|
if (File::testFileNoThrow($filename, '')) { |
52
|
3 |
|
$zip = new ZipArchive(); |
53
|
3 |
|
if ($zip->open($filename) === true) { |
54
|
|
|
// check if it is an OOXML archive |
55
|
3 |
|
$stat = $zip->statName('mimetype'); |
56
|
3 |
|
if (!empty($stat) && ($stat['size'] <= 255)) { |
57
|
2 |
|
$mimeType = $zip->getFromName($stat['name']); |
58
|
1 |
|
} elseif ($zip->statName('META-INF/manifest.xml')) { |
59
|
1 |
|
$xml = simplexml_load_string( |
60
|
1 |
|
$this->getSecurityScannerOrThrow() |
61
|
1 |
|
->scan( |
62
|
1 |
|
$zip->getFromName( |
63
|
1 |
|
'META-INF/manifest.xml' |
64
|
1 |
|
) |
65
|
1 |
|
) |
66
|
1 |
|
); |
67
|
1 |
|
if ($xml !== false) { |
68
|
1 |
|
$namespacesContent = $xml->getNamespaces(true); |
69
|
1 |
|
if (isset($namespacesContent['manifest'])) { |
70
|
1 |
|
$manifest = $xml->children($namespacesContent['manifest']); |
71
|
1 |
|
foreach ($manifest as $manifestDataSet) { |
72
|
1 |
|
$manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); |
73
|
1 |
|
if ($manifestAttributes && $manifestAttributes->{'full-path'} == '/') { |
74
|
1 |
|
$mimeType = (string) $manifestAttributes->{'media-type'}; |
75
|
|
|
|
76
|
1 |
|
break; |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
3 |
|
$zip->close(); |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|
87
|
17 |
|
return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. |
92
|
|
|
* |
93
|
|
|
* @return string[] |
94
|
|
|
*/ |
95
|
6 |
|
public function listWorksheetNames(string $filename): array |
96
|
|
|
{ |
97
|
6 |
|
File::assertFile($filename, self::INITIAL_FILE); |
98
|
|
|
|
99
|
2 |
|
$worksheetNames = []; |
100
|
|
|
|
101
|
2 |
|
$xml = new XMLReader(); |
102
|
2 |
|
$xml->xml( |
103
|
2 |
|
$this->getSecurityScannerOrThrow() |
104
|
2 |
|
->scanFile( |
105
|
2 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
106
|
2 |
|
) |
107
|
2 |
|
); |
108
|
2 |
|
$xml->setParserProperty(2, true); |
109
|
|
|
|
110
|
|
|
// Step into the first level of content of the XML |
111
|
2 |
|
$xml->read(); |
112
|
2 |
|
while ($xml->read()) { |
113
|
|
|
// Quickly jump through to the office:body node |
114
|
2 |
|
while ($xml->name !== 'office:body') { |
115
|
2 |
|
if ($xml->isEmptyElement) { |
116
|
2 |
|
$xml->read(); |
117
|
|
|
} else { |
118
|
2 |
|
$xml->next(); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
// Now read each node until we find our first table:table node |
122
|
2 |
|
while ($xml->read()) { |
123
|
2 |
|
$xmlName = $xml->name; |
124
|
2 |
|
if ($xmlName == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
125
|
|
|
// Loop through each table:table node reading the table:name attribute for each worksheet name |
126
|
|
|
do { |
127
|
2 |
|
$worksheetName = $xml->getAttribute('table:name'); |
128
|
2 |
|
if (!empty($worksheetName)) { |
129
|
2 |
|
$worksheetNames[] = $worksheetName; |
130
|
|
|
} |
131
|
2 |
|
$xml->next(); |
132
|
2 |
|
} while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
2 |
|
return $worksheetNames; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
142
|
|
|
*/ |
143
|
6 |
|
public function listWorksheetInfo(string $filename): array |
144
|
|
|
{ |
145
|
6 |
|
File::assertFile($filename, self::INITIAL_FILE); |
146
|
|
|
|
147
|
2 |
|
$worksheetInfo = []; |
148
|
|
|
|
149
|
2 |
|
$xml = new XMLReader(); |
150
|
2 |
|
$xml->xml( |
151
|
2 |
|
$this->getSecurityScannerOrThrow() |
152
|
2 |
|
->scanFile( |
153
|
2 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
154
|
2 |
|
) |
155
|
2 |
|
); |
156
|
2 |
|
$xml->setParserProperty(2, true); |
157
|
|
|
|
158
|
|
|
// Step into the first level of content of the XML |
159
|
2 |
|
$xml->read(); |
160
|
2 |
|
$tableVisibility = []; |
161
|
2 |
|
$lastTableStyle = ''; |
162
|
|
|
|
163
|
2 |
|
while ($xml->read()) { |
164
|
2 |
|
if ($xml->name === 'style:style') { |
165
|
2 |
|
$styleType = $xml->getAttribute('style:family'); |
166
|
2 |
|
if ($styleType === 'table') { |
167
|
2 |
|
$lastTableStyle = $xml->getAttribute('style:name'); |
168
|
|
|
} |
169
|
2 |
|
} elseif ($xml->name === 'style:table-properties') { |
170
|
2 |
|
$visibility = $xml->getAttribute('table:display'); |
171
|
2 |
|
$tableVisibility[$lastTableStyle] = ($visibility === 'false') ? Worksheet::SHEETSTATE_HIDDEN : Worksheet::SHEETSTATE_VISIBLE; |
172
|
2 |
|
} elseif ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
173
|
2 |
|
$worksheetNames[] = $xml->getAttribute('table:name'); |
174
|
|
|
|
175
|
2 |
|
$styleName = $xml->getAttribute('table:style-name') ?? ''; |
176
|
2 |
|
$visibility = $tableVisibility[$styleName] ?? ''; |
177
|
2 |
|
$tmpInfo = [ |
178
|
2 |
|
'worksheetName' => $xml->getAttribute('table:name'), |
179
|
2 |
|
'lastColumnLetter' => 'A', |
180
|
2 |
|
'lastColumnIndex' => 0, |
181
|
2 |
|
'totalRows' => 0, |
182
|
2 |
|
'totalColumns' => 0, |
183
|
2 |
|
'sheetState' => $visibility, |
184
|
2 |
|
]; |
185
|
|
|
|
186
|
|
|
// Loop through each child node of the table:table element reading |
187
|
2 |
|
$currCells = 0; |
188
|
|
|
do { |
189
|
2 |
|
$xml->read(); |
190
|
2 |
|
if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { |
191
|
2 |
|
$rowspan = $xml->getAttribute('table:number-rows-repeated'); |
192
|
2 |
|
$rowspan = empty($rowspan) ? 1 : $rowspan; |
193
|
2 |
|
$tmpInfo['totalRows'] += $rowspan; |
194
|
2 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
195
|
2 |
|
$currCells = 0; |
196
|
|
|
// Step into the row |
197
|
2 |
|
$xml->read(); |
198
|
|
|
do { |
199
|
2 |
|
$doread = true; |
200
|
2 |
|
if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
201
|
2 |
|
if (!$xml->isEmptyElement) { |
202
|
2 |
|
++$currCells; |
203
|
2 |
|
$xml->next(); |
204
|
2 |
|
$doread = false; |
205
|
|
|
} |
206
|
1 |
|
} elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
207
|
1 |
|
$mergeSize = $xml->getAttribute('table:number-columns-repeated'); |
208
|
1 |
|
$currCells += (int) $mergeSize; |
209
|
|
|
} |
210
|
2 |
|
if ($doread) { |
211
|
1 |
|
$xml->read(); |
212
|
|
|
} |
213
|
2 |
|
} while ($xml->name != 'table:table-row'); |
214
|
|
|
} |
215
|
2 |
|
} while ($xml->name != 'table:table'); |
216
|
|
|
|
217
|
2 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
218
|
2 |
|
$tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; |
219
|
2 |
|
$tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); |
220
|
2 |
|
$worksheetInfo[] = $tmpInfo; |
221
|
|
|
} |
222
|
|
|
} |
223
|
|
|
|
224
|
2 |
|
return $worksheetInfo; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
/** |
228
|
|
|
* Loads PhpSpreadsheet from file. |
229
|
|
|
*/ |
230
|
68 |
|
protected function loadSpreadsheetFromFile(string $filename): Spreadsheet |
231
|
|
|
{ |
232
|
68 |
|
$spreadsheet = $this->newSpreadsheet(); |
233
|
68 |
|
$spreadsheet->setValueBinder($this->valueBinder); |
234
|
68 |
|
$spreadsheet->removeSheetByIndex(0); |
235
|
|
|
|
236
|
|
|
// Load into this instance |
237
|
68 |
|
return $this->loadIntoExisting($filename, $spreadsheet); |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
/** |
241
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
242
|
|
|
*/ |
243
|
72 |
|
public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet |
244
|
|
|
{ |
245
|
72 |
|
File::assertFile($filename, self::INITIAL_FILE); |
246
|
|
|
|
247
|
68 |
|
$zip = new ZipArchive(); |
248
|
68 |
|
$zip->open($filename); |
249
|
|
|
|
250
|
|
|
// Meta |
251
|
|
|
|
252
|
68 |
|
$xml = @simplexml_load_string( |
253
|
68 |
|
$this->getSecurityScannerOrThrow() |
254
|
68 |
|
->scan($zip->getFromName('meta.xml')) |
255
|
68 |
|
); |
256
|
68 |
|
if ($xml === false) { |
257
|
1 |
|
throw new Exception('Unable to read data from {$pFilename}'); |
258
|
|
|
} |
259
|
|
|
|
260
|
67 |
|
$namespacesMeta = $xml->getNamespaces(true); |
261
|
|
|
|
262
|
67 |
|
(new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); |
263
|
|
|
|
264
|
|
|
// Styles |
265
|
|
|
|
266
|
67 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
267
|
67 |
|
$dom->loadXML( |
268
|
67 |
|
$this->getSecurityScannerOrThrow() |
269
|
67 |
|
->scan($zip->getFromName('styles.xml')) |
270
|
67 |
|
); |
271
|
|
|
|
272
|
67 |
|
$pageSettings = new PageSettings($dom); |
273
|
|
|
|
274
|
|
|
// Main Content |
275
|
|
|
|
276
|
67 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
277
|
67 |
|
$dom->loadXML( |
278
|
67 |
|
$this->getSecurityScannerOrThrow() |
279
|
67 |
|
->scan($zip->getFromName(self::INITIAL_FILE)) |
280
|
67 |
|
); |
281
|
|
|
|
282
|
67 |
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
283
|
67 |
|
$tableNs = (string) $dom->lookupNamespaceUri('table'); |
284
|
67 |
|
$textNs = (string) $dom->lookupNamespaceUri('text'); |
285
|
67 |
|
$xlinkNs = (string) $dom->lookupNamespaceUri('xlink'); |
286
|
67 |
|
$styleNs = (string) $dom->lookupNamespaceUri('style'); |
287
|
|
|
|
288
|
67 |
|
$pageSettings->readStyleCrossReferences($dom); |
289
|
|
|
|
290
|
67 |
|
$autoFilterReader = new AutoFilter($spreadsheet, $tableNs); |
291
|
67 |
|
$definedNameReader = new DefinedNames($spreadsheet, $tableNs); |
292
|
67 |
|
$columnWidths = []; |
293
|
67 |
|
$automaticStyle0 = $dom->getElementsByTagNameNS($officeNs, 'automatic-styles')->item(0); |
294
|
67 |
|
$automaticStyles = ($automaticStyle0 === null) ? [] : $automaticStyle0->getElementsByTagNameNS($styleNs, 'style'); |
295
|
67 |
|
foreach ($automaticStyles as $automaticStyle) { |
296
|
67 |
|
$styleName = $automaticStyle->getAttributeNS($styleNs, 'name'); |
297
|
67 |
|
$styleFamily = $automaticStyle->getAttributeNS($styleNs, 'family'); |
298
|
67 |
|
if ($styleFamily === 'table-column') { |
299
|
42 |
|
$tcprops = $automaticStyle->getElementsByTagNameNS($styleNs, 'table-column-properties'); |
300
|
42 |
|
$tcprop = $tcprops->item(0); |
301
|
42 |
|
if ($tcprop !== null) { |
302
|
42 |
|
$columnWidth = $tcprop->getAttributeNs($styleNs, 'column-width'); |
303
|
42 |
|
$columnWidths[$styleName] = $columnWidth; |
304
|
|
|
} |
305
|
|
|
} |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
// Content |
309
|
67 |
|
$item0 = $dom->getElementsByTagNameNS($officeNs, 'body')->item(0); |
310
|
67 |
|
$spreadsheets = ($item0 === null) ? [] : $item0->getElementsByTagNameNS($officeNs, 'spreadsheet'); |
311
|
|
|
|
312
|
67 |
|
foreach ($spreadsheets as $workbookData) { |
313
|
|
|
/** @var DOMElement $workbookData */ |
314
|
67 |
|
$tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); |
315
|
|
|
|
316
|
67 |
|
$worksheetID = 0; |
317
|
67 |
|
foreach ($tables as $worksheetDataSet) { |
318
|
|
|
/** @var DOMElement $worksheetDataSet */ |
319
|
67 |
|
$worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); |
320
|
|
|
|
321
|
|
|
// Check loadSheetsOnly |
322
|
|
|
if ( |
323
|
67 |
|
$this->loadSheetsOnly !== null |
324
|
|
|
&& $worksheetName |
325
|
67 |
|
&& !in_array($worksheetName, $this->loadSheetsOnly) |
326
|
|
|
) { |
327
|
3 |
|
continue; |
328
|
|
|
} |
329
|
|
|
|
330
|
66 |
|
$worksheetStyleName = $worksheetDataSet->getAttributeNS($tableNs, 'style-name'); |
331
|
|
|
|
332
|
|
|
// Create sheet |
333
|
66 |
|
$spreadsheet->createSheet(); |
334
|
66 |
|
$spreadsheet->setActiveSheetIndex($worksheetID); |
335
|
|
|
|
336
|
66 |
|
if ($worksheetName || is_numeric($worksheetName)) { |
337
|
|
|
// Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in |
338
|
|
|
// formula cells... during the load, all formulae should be correct, and we're simply |
339
|
|
|
// bringing the worksheet name in line with the formula, not the reverse |
340
|
66 |
|
$spreadsheet->getActiveSheet()->setTitle((string) $worksheetName, false, false); |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
// Go through every child of table element |
344
|
66 |
|
$rowID = 1; |
345
|
66 |
|
$tableColumnIndex = 1; |
346
|
66 |
|
foreach ($worksheetDataSet->childNodes as $childNode) { |
347
|
|
|
/** @var DOMElement $childNode */ |
348
|
|
|
|
349
|
|
|
// Filter elements which are not under the "table" ns |
350
|
66 |
|
if ($childNode->namespaceURI != $tableNs) { |
351
|
37 |
|
continue; |
352
|
|
|
} |
353
|
|
|
|
354
|
66 |
|
$key = $childNode->nodeName; |
355
|
|
|
|
356
|
|
|
// Remove ns from node name |
357
|
66 |
|
if (str_contains($key, ':')) { |
358
|
66 |
|
$keyChunks = explode(':', $key); |
359
|
66 |
|
$key = array_pop($keyChunks); |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
switch ($key) { |
363
|
66 |
|
case 'table-header-rows': |
364
|
|
|
/// TODO :: Figure this out. This is only a partial implementation I guess. |
365
|
|
|
// ($rowData it's not used at all and I'm not sure that PHPExcel |
366
|
|
|
// has an API for this) |
367
|
|
|
|
368
|
|
|
// foreach ($rowData as $keyRowData => $cellData) { |
369
|
|
|
// $rowData = $cellData; |
370
|
|
|
// break; |
371
|
|
|
// } |
372
|
|
|
break; |
373
|
66 |
|
case 'table-column': |
374
|
41 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
375
|
36 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-columns-repeated'); |
376
|
|
|
} else { |
377
|
16 |
|
$rowRepeats = 1; |
378
|
|
|
} |
379
|
41 |
|
$tableStyleName = $childNode->getAttributeNS($tableNs, 'style-name'); |
380
|
41 |
|
if (isset($columnWidths[$tableStyleName])) { |
381
|
41 |
|
$columnWidth = new HelperDimension($columnWidths[$tableStyleName]); |
382
|
41 |
|
$tableColumnString = Coordinate::stringFromColumnIndex($tableColumnIndex); |
383
|
41 |
|
for ($rowRepeats2 = $rowRepeats; $rowRepeats2 > 0; --$rowRepeats2) { |
384
|
41 |
|
$spreadsheet->getActiveSheet() |
385
|
41 |
|
->getColumnDimension($tableColumnString) |
386
|
41 |
|
->setWidth($columnWidth->toUnit('cm'), 'cm'); |
387
|
41 |
|
++$tableColumnString; |
388
|
|
|
} |
389
|
|
|
} |
390
|
41 |
|
$tableColumnIndex += $rowRepeats; |
391
|
|
|
|
392
|
41 |
|
break; |
393
|
65 |
|
case 'table-row': |
394
|
65 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { |
395
|
24 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); |
396
|
|
|
} else { |
397
|
65 |
|
$rowRepeats = 1; |
398
|
|
|
} |
399
|
|
|
|
400
|
65 |
|
$columnID = 'A'; |
401
|
|
|
/** @var DOMElement|DOMText $cellData */ |
402
|
65 |
|
foreach ($childNode->childNodes as $cellData) { |
403
|
65 |
|
if ($cellData instanceof DOMText) { |
404
|
2 |
|
continue; // should just be whitespace |
405
|
|
|
} |
406
|
65 |
|
if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { |
407
|
2 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
408
|
1 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
409
|
|
|
} else { |
410
|
2 |
|
$colRepeats = 1; |
411
|
|
|
} |
412
|
|
|
|
413
|
2 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
414
|
2 |
|
++$columnID; |
415
|
|
|
} |
416
|
|
|
|
417
|
2 |
|
continue; |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
// Initialize variables |
421
|
65 |
|
$formatting = $hyperlink = null; |
422
|
65 |
|
$hasCalculatedValue = false; |
423
|
65 |
|
$cellDataFormula = ''; |
424
|
65 |
|
$cellDataType = ''; |
425
|
65 |
|
$cellDataRef = ''; |
426
|
|
|
|
427
|
65 |
|
if ($cellData->hasAttributeNS($tableNs, 'formula')) { |
428
|
25 |
|
$cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); |
429
|
25 |
|
$hasCalculatedValue = true; |
430
|
|
|
} |
431
|
65 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-columns-spanned')) { |
432
|
12 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-rows-spanned')) { |
433
|
12 |
|
$cellDataType = 'array'; |
434
|
12 |
|
$arrayRow = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-rows-spanned'); |
435
|
12 |
|
$arrayCol = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-columns-spanned'); |
436
|
12 |
|
$lastRow = $rowID + $arrayRow - 1; |
437
|
12 |
|
$lastCol = $columnID; |
438
|
12 |
|
while ($arrayCol > 1) { |
439
|
7 |
|
++$lastCol; |
440
|
7 |
|
--$arrayCol; |
441
|
|
|
} |
442
|
12 |
|
$cellDataRef = "$columnID$rowID:$lastCol$lastRow"; |
443
|
|
|
} |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
// Annotations |
447
|
65 |
|
$annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); |
448
|
|
|
|
449
|
65 |
|
if ($annotation->length > 0 && $annotation->item(0) !== null) { |
450
|
11 |
|
$textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); |
451
|
11 |
|
$textNodeLength = $textNode->length; |
452
|
11 |
|
$newLineOwed = false; |
453
|
11 |
|
for ($textNodeIndex = 0; $textNodeIndex < $textNodeLength; ++$textNodeIndex) { |
454
|
11 |
|
$textNodeItem = $textNode->item($textNodeIndex); |
455
|
11 |
|
if ($textNodeItem !== null) { |
456
|
11 |
|
$text = $this->scanElementForText($textNodeItem); |
457
|
11 |
|
if ($newLineOwed) { |
458
|
1 |
|
$spreadsheet->getActiveSheet() |
459
|
1 |
|
->getComment($columnID . $rowID) |
460
|
1 |
|
->getText() |
461
|
1 |
|
->createText("\n"); |
462
|
|
|
} |
463
|
11 |
|
$newLineOwed = true; |
464
|
|
|
|
465
|
11 |
|
$spreadsheet->getActiveSheet() |
466
|
11 |
|
->getComment($columnID . $rowID) |
467
|
11 |
|
->getText() |
468
|
11 |
|
->createText($this->parseRichText($text)); |
469
|
|
|
} |
470
|
|
|
} |
471
|
|
|
} |
472
|
|
|
|
473
|
|
|
// Content |
474
|
|
|
|
475
|
|
|
/** @var DOMElement[] $paragraphs */ |
476
|
65 |
|
$paragraphs = []; |
477
|
|
|
|
478
|
65 |
|
foreach ($cellData->childNodes as $item) { |
479
|
|
|
/** @var DOMElement $item */ |
480
|
|
|
|
481
|
|
|
// Filter text:p elements |
482
|
65 |
|
if ($item->nodeName == 'text:p') { |
483
|
65 |
|
$paragraphs[] = $item; |
484
|
|
|
} |
485
|
|
|
} |
486
|
|
|
|
487
|
65 |
|
if (count($paragraphs) > 0) { |
488
|
|
|
// Consolidate if there are multiple p records (maybe with spans as well) |
489
|
65 |
|
$dataArray = []; |
490
|
|
|
|
491
|
|
|
// Text can have multiple text:p and within those, multiple text:span. |
492
|
|
|
// text:p newlines, but text:span does not. |
493
|
|
|
// Also, here we assume there is no text data is span fields are specified, since |
494
|
|
|
// we have no way of knowing proper positioning anyway. |
495
|
|
|
|
496
|
65 |
|
foreach ($paragraphs as $pData) { |
497
|
65 |
|
$dataArray[] = $this->scanElementForText($pData); |
498
|
|
|
} |
499
|
65 |
|
$allCellDataText = implode("\n", $dataArray); |
500
|
|
|
|
501
|
65 |
|
$type = $cellData->getAttributeNS($officeNs, 'value-type'); |
502
|
|
|
|
503
|
|
|
switch ($type) { |
504
|
65 |
|
case 'string': |
505
|
42 |
|
$type = DataType::TYPE_STRING; |
506
|
42 |
|
$dataValue = $allCellDataText; |
507
|
|
|
|
508
|
42 |
|
foreach ($paragraphs as $paragraph) { |
509
|
42 |
|
$link = $paragraph->getElementsByTagNameNS($textNs, 'a'); |
510
|
42 |
|
if ($link->length > 0 && $link->item(0) !== null) { |
511
|
7 |
|
$hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); |
512
|
|
|
} |
513
|
|
|
} |
514
|
|
|
|
515
|
42 |
|
break; |
516
|
44 |
|
case 'boolean': |
517
|
8 |
|
$type = DataType::TYPE_BOOL; |
518
|
8 |
|
$dataValue = ($cellData->getAttributeNS($officeNs, 'boolean-value') === 'true') ? true : false; |
519
|
|
|
|
520
|
8 |
|
break; |
521
|
42 |
|
case 'percentage': |
522
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
523
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
524
|
|
|
|
525
|
|
|
// percentage should always be float |
526
|
|
|
//if (floor($dataValue) == $dataValue) { |
527
|
|
|
// $dataValue = (int) $dataValue; |
528
|
|
|
//} |
529
|
4 |
|
$formatting = NumberFormat::FORMAT_PERCENTAGE_00; |
530
|
|
|
|
531
|
4 |
|
break; |
532
|
42 |
|
case 'currency': |
533
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
534
|
4 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
535
|
|
|
|
536
|
4 |
|
if (floor($dataValue) == $dataValue) { |
537
|
4 |
|
$dataValue = (int) $dataValue; |
538
|
|
|
} |
539
|
4 |
|
$formatting = NumberFormat::FORMAT_CURRENCY_USD_INTEGER; |
540
|
|
|
|
541
|
4 |
|
break; |
542
|
38 |
|
case 'float': |
543
|
38 |
|
$type = DataType::TYPE_NUMERIC; |
544
|
38 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
545
|
|
|
|
546
|
38 |
|
if (floor($dataValue) == $dataValue) { |
547
|
36 |
|
if ($dataValue == (int) $dataValue) { |
548
|
36 |
|
$dataValue = (int) $dataValue; |
549
|
|
|
} |
550
|
|
|
} |
551
|
|
|
|
552
|
38 |
|
break; |
553
|
8 |
|
case 'date': |
554
|
7 |
|
$type = DataType::TYPE_NUMERIC; |
555
|
7 |
|
$value = $cellData->getAttributeNS($officeNs, 'date-value'); |
556
|
7 |
|
$dataValue = Date::convertIsoDate($value); |
557
|
|
|
|
558
|
7 |
|
if ($dataValue != floor($dataValue)) { |
559
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15 |
560
|
6 |
|
. ' ' |
561
|
6 |
|
. NumberFormat::FORMAT_DATE_TIME4; |
562
|
|
|
} else { |
563
|
7 |
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15; |
564
|
|
|
} |
565
|
|
|
|
566
|
7 |
|
break; |
567
|
7 |
|
case 'time': |
568
|
6 |
|
$type = DataType::TYPE_NUMERIC; |
569
|
|
|
|
570
|
6 |
|
$timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); |
571
|
|
|
|
572
|
6 |
|
$dataValue = Date::PHPToExcel( |
573
|
6 |
|
strtotime( |
574
|
6 |
|
'01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS') ?? []) |
575
|
6 |
|
) |
576
|
6 |
|
); |
577
|
6 |
|
$formatting = NumberFormat::FORMAT_DATE_TIME4; |
578
|
|
|
|
579
|
6 |
|
break; |
580
|
|
|
default: |
581
|
1 |
|
$dataValue = null; |
582
|
|
|
} |
583
|
|
|
} else { |
584
|
39 |
|
$type = DataType::TYPE_NULL; |
585
|
39 |
|
$dataValue = null; |
586
|
|
|
} |
587
|
|
|
|
588
|
65 |
|
if ($hasCalculatedValue) { |
589
|
25 |
|
$type = DataType::TYPE_FORMULA; |
590
|
25 |
|
$cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); |
591
|
25 |
|
$cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula); |
592
|
|
|
} |
593
|
|
|
|
594
|
65 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
595
|
34 |
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
596
|
|
|
} else { |
597
|
65 |
|
$colRepeats = 1; |
598
|
|
|
} |
599
|
|
|
|
600
|
65 |
|
if ($type !== null) { // @phpstan-ignore-line |
601
|
65 |
|
for ($i = 0; $i < $colRepeats; ++$i) { |
602
|
65 |
|
if ($i > 0) { |
603
|
34 |
|
++$columnID; |
604
|
|
|
} |
605
|
|
|
|
606
|
65 |
|
if ($type !== DataType::TYPE_NULL) { |
607
|
65 |
|
for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { |
608
|
65 |
|
$rID = $rowID + $rowAdjust; |
609
|
|
|
|
610
|
65 |
|
$cell = $spreadsheet->getActiveSheet() |
611
|
65 |
|
->getCell($columnID . $rID); |
612
|
|
|
|
613
|
|
|
// Set value |
614
|
65 |
|
if ($hasCalculatedValue) { |
615
|
25 |
|
$cell->setValueExplicit($cellDataFormula, $type); |
616
|
25 |
|
if ($cellDataType === 'array') { |
617
|
12 |
|
$cell->setFormulaAttributes(['t' => 'array', 'ref' => $cellDataRef]); |
618
|
|
|
} |
619
|
64 |
|
} elseif ($type !== '' || $dataValue !== null) { |
620
|
64 |
|
$cell->setValueExplicit($dataValue, $type); |
621
|
|
|
} |
622
|
|
|
|
623
|
65 |
|
if ($hasCalculatedValue) { |
624
|
25 |
|
$cell->setCalculatedValue($dataValue, $type === DataType::TYPE_NUMERIC); |
625
|
|
|
} |
626
|
|
|
|
627
|
|
|
// Set other properties |
628
|
65 |
|
if ($formatting !== null) { |
629
|
11 |
|
$spreadsheet->getActiveSheet() |
630
|
11 |
|
->getStyle($columnID . $rID) |
631
|
11 |
|
->getNumberFormat() |
632
|
11 |
|
->setFormatCode($formatting); |
633
|
|
|
} else { |
634
|
65 |
|
$spreadsheet->getActiveSheet() |
635
|
65 |
|
->getStyle($columnID . $rID) |
636
|
65 |
|
->getNumberFormat() |
637
|
65 |
|
->setFormatCode(NumberFormat::FORMAT_GENERAL); |
638
|
|
|
} |
639
|
|
|
|
640
|
65 |
|
if ($hyperlink !== null) { |
641
|
7 |
|
if ($hyperlink[0] === '#') { |
642
|
1 |
|
$hyperlink = 'sheet://' . substr($hyperlink, 1); |
643
|
|
|
} |
644
|
7 |
|
$cell->getHyperlink() |
645
|
7 |
|
->setUrl($hyperlink); |
646
|
|
|
} |
647
|
|
|
} |
648
|
|
|
} |
649
|
|
|
} |
650
|
|
|
} |
651
|
|
|
|
652
|
|
|
// Merged cells |
653
|
65 |
|
$this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet); |
654
|
|
|
|
655
|
65 |
|
++$columnID; |
656
|
|
|
} |
657
|
65 |
|
$rowID += $rowRepeats; |
658
|
|
|
|
659
|
65 |
|
break; |
660
|
|
|
} |
661
|
|
|
} |
662
|
66 |
|
$pageSettings->setVisibilityForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
663
|
66 |
|
$pageSettings->setPrintSettingsForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
664
|
66 |
|
++$worksheetID; |
665
|
|
|
} |
666
|
|
|
|
667
|
67 |
|
$autoFilterReader->read($workbookData); |
668
|
67 |
|
$definedNameReader->read($workbookData); |
669
|
|
|
} |
670
|
67 |
|
$spreadsheet->setActiveSheetIndex(0); |
671
|
|
|
|
672
|
66 |
|
if ($zip->locateName('settings.xml') !== false) { |
673
|
59 |
|
$this->processSettings($zip, $spreadsheet); |
674
|
|
|
} |
675
|
|
|
|
676
|
|
|
// Return |
677
|
66 |
|
return $spreadsheet; |
678
|
|
|
} |
679
|
|
|
|
680
|
59 |
|
private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): void |
681
|
|
|
{ |
682
|
59 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
683
|
59 |
|
$dom->loadXML( |
684
|
59 |
|
$this->getSecurityScannerOrThrow() |
685
|
59 |
|
->scan($zip->getFromName('settings.xml')) |
686
|
59 |
|
); |
687
|
|
|
//$xlinkNs = $dom->lookupNamespaceUri('xlink'); |
688
|
59 |
|
$configNs = (string) $dom->lookupNamespaceUri('config'); |
689
|
|
|
//$oooNs = $dom->lookupNamespaceUri('ooo'); |
690
|
59 |
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
691
|
59 |
|
$settings = $dom->getElementsByTagNameNS($officeNs, 'settings') |
692
|
59 |
|
->item(0); |
693
|
59 |
|
if ($settings !== null) { |
694
|
59 |
|
$this->lookForActiveSheet($settings, $spreadsheet, $configNs); |
695
|
59 |
|
$this->lookForSelectedCells($settings, $spreadsheet, $configNs); |
696
|
|
|
} |
697
|
|
|
} |
698
|
|
|
|
699
|
59 |
|
private function lookForActiveSheet(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
700
|
|
|
{ |
701
|
|
|
/** @var DOMElement $t */ |
702
|
59 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item') as $t) { |
703
|
58 |
|
if ($t->getAttributeNs($configNs, 'name') === 'ActiveTable') { |
704
|
|
|
try { |
705
|
58 |
|
$spreadsheet->setActiveSheetIndexByName($t->nodeValue ?? ''); |
706
|
1 |
|
} catch (Throwable) { |
707
|
|
|
// do nothing |
708
|
|
|
} |
709
|
|
|
|
710
|
58 |
|
break; |
711
|
|
|
} |
712
|
|
|
} |
713
|
|
|
} |
714
|
|
|
|
715
|
59 |
|
private function lookForSelectedCells(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
716
|
|
|
{ |
717
|
|
|
/** @var DOMElement $t */ |
718
|
59 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item-map-named') as $t) { |
719
|
58 |
|
if ($t->getAttributeNs($configNs, 'name') === 'Tables') { |
720
|
58 |
|
foreach ($t->getElementsByTagNameNS($configNs, 'config-item-map-entry') as $ws) { |
721
|
58 |
|
$setRow = $setCol = ''; |
722
|
58 |
|
$wsname = $ws->getAttributeNs($configNs, 'name'); |
723
|
58 |
|
foreach ($ws->getElementsByTagNameNS($configNs, 'config-item') as $configItem) { |
724
|
58 |
|
$attrName = $configItem->getAttributeNs($configNs, 'name'); |
725
|
58 |
|
if ($attrName === 'CursorPositionX') { |
726
|
58 |
|
$setCol = $configItem->nodeValue; |
727
|
|
|
} |
728
|
58 |
|
if ($attrName === 'CursorPositionY') { |
729
|
58 |
|
$setRow = $configItem->nodeValue; |
730
|
|
|
} |
731
|
|
|
} |
732
|
58 |
|
$this->setSelected($spreadsheet, $wsname, "$setCol", "$setRow"); |
733
|
|
|
} |
734
|
|
|
|
735
|
58 |
|
break; |
736
|
|
|
} |
737
|
|
|
} |
738
|
|
|
} |
739
|
|
|
|
740
|
58 |
|
private function setSelected(Spreadsheet $spreadsheet, string $wsname, string $setCol, string $setRow): void |
741
|
|
|
{ |
742
|
58 |
|
if (is_numeric($setCol) && is_numeric($setRow)) { |
743
|
58 |
|
$sheet = $spreadsheet->getSheetByName($wsname); |
744
|
58 |
|
if ($sheet !== null) { |
745
|
58 |
|
$sheet->setSelectedCells([(int) $setCol + 1, (int) $setRow + 1]); |
746
|
|
|
} |
747
|
|
|
} |
748
|
|
|
} |
749
|
|
|
|
750
|
|
|
/** |
751
|
|
|
* Recursively scan element. |
752
|
|
|
*/ |
753
|
65 |
|
protected function scanElementForText(DOMNode $element): string |
754
|
|
|
{ |
755
|
65 |
|
$str = ''; |
756
|
65 |
|
foreach ($element->childNodes as $child) { |
757
|
|
|
/** @var DOMNode $child */ |
758
|
65 |
|
if ($child->nodeType == XML_TEXT_NODE) { |
759
|
65 |
|
$str .= $child->nodeValue; |
760
|
16 |
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:line-break') { |
761
|
1 |
|
$str .= "\n"; |
762
|
16 |
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { |
763
|
|
|
// It's a space |
764
|
|
|
|
765
|
|
|
// Multiple spaces? |
766
|
4 |
|
$attributes = $child->attributes; |
767
|
|
|
/** @var ?DOMAttr $cAttr */ |
768
|
4 |
|
$cAttr = ($attributes === null) ? null : $attributes->getNamedItem('c'); |
769
|
4 |
|
$multiplier = self::getMultiplier($cAttr); |
770
|
4 |
|
$str .= str_repeat(' ', $multiplier); |
771
|
|
|
} |
772
|
|
|
|
773
|
65 |
|
if ($child->hasChildNodes()) { |
774
|
16 |
|
$str .= $this->scanElementForText($child); |
775
|
|
|
} |
776
|
|
|
} |
777
|
|
|
|
778
|
65 |
|
return $str; |
779
|
|
|
} |
780
|
|
|
|
781
|
4 |
|
private static function getMultiplier(?DOMAttr $cAttr): int |
782
|
|
|
{ |
783
|
4 |
|
if ($cAttr) { |
784
|
4 |
|
$multiplier = (int) $cAttr->nodeValue; |
785
|
|
|
} else { |
786
|
4 |
|
$multiplier = 1; |
787
|
|
|
} |
788
|
|
|
|
789
|
4 |
|
return $multiplier; |
790
|
|
|
} |
791
|
|
|
|
792
|
11 |
|
private function parseRichText(string $is): RichText |
793
|
|
|
{ |
794
|
11 |
|
$value = new RichText(); |
795
|
11 |
|
$value->createText($is); |
796
|
|
|
|
797
|
11 |
|
return $value; |
798
|
|
|
} |
799
|
|
|
|
800
|
65 |
|
private function processMergedCells( |
801
|
|
|
DOMElement $cellData, |
802
|
|
|
string $tableNs, |
803
|
|
|
string $type, |
804
|
|
|
string $columnID, |
805
|
|
|
int $rowID, |
806
|
|
|
Spreadsheet $spreadsheet |
807
|
|
|
): void { |
808
|
|
|
if ( |
809
|
65 |
|
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned') |
810
|
65 |
|
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') |
811
|
|
|
) { |
812
|
17 |
|
if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) { |
813
|
17 |
|
$columnTo = $columnID; |
814
|
|
|
|
815
|
17 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { |
816
|
16 |
|
$columnIndex = Coordinate::columnIndexFromString($columnID); |
817
|
16 |
|
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); |
818
|
16 |
|
$columnIndex -= 2; |
819
|
|
|
|
820
|
16 |
|
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); |
821
|
|
|
} |
822
|
|
|
|
823
|
17 |
|
$rowTo = $rowID; |
824
|
|
|
|
825
|
17 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { |
826
|
17 |
|
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; |
827
|
|
|
} |
828
|
|
|
|
829
|
17 |
|
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; |
830
|
17 |
|
$spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE); |
831
|
|
|
} |
832
|
|
|
} |
833
|
|
|
} |
834
|
|
|
} |
835
|
|
|
|