1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMAttr; |
6
|
|
|
use DOMDocument; |
7
|
|
|
use DOMElement; |
8
|
|
|
use DOMNode; |
9
|
|
|
use DOMText; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
11
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\DataType; |
12
|
|
|
use PhpOffice\PhpSpreadsheet\Helper\Dimension as HelperDimension; |
13
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter; |
14
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames; |
15
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator; |
16
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings; |
17
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; |
18
|
|
|
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; |
19
|
|
|
use PhpOffice\PhpSpreadsheet\RichText\RichText; |
20
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\Date; |
21
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\File; |
22
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
23
|
|
|
use PhpOffice\PhpSpreadsheet\Style\NumberFormat; |
24
|
|
|
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; |
25
|
|
|
use Throwable; |
26
|
|
|
use XMLReader; |
27
|
|
|
use ZipArchive; |
28
|
|
|
|
29
|
|
|
class Ods extends BaseReader |
30
|
|
|
{ |
31
|
|
|
const INITIAL_FILE = 'content.xml'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Create a new Ods Reader instance. |
35
|
|
|
*/ |
36
|
98 |
|
public function __construct() |
37
|
|
|
{ |
38
|
98 |
|
parent::__construct(); |
39
|
98 |
|
$this->securityScanner = XmlScanner::getInstance($this); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Can the current IReader read the file? |
44
|
|
|
*/ |
45
|
17 |
|
public function canRead(string $filename): bool |
46
|
|
|
{ |
47
|
17 |
|
$mimeType = 'UNKNOWN'; |
48
|
|
|
|
49
|
|
|
// Load file |
50
|
|
|
|
51
|
17 |
|
if (File::testFileNoThrow($filename, '')) { |
52
|
3 |
|
$zip = new ZipArchive(); |
53
|
3 |
|
if ($zip->open($filename) === true) { |
54
|
|
|
// check if it is an OOXML archive |
55
|
3 |
|
$stat = $zip->statName('mimetype'); |
56
|
3 |
|
if (!empty($stat) && ($stat['size'] <= 255)) { |
57
|
2 |
|
$mimeType = $zip->getFromName($stat['name']); |
58
|
1 |
|
} elseif ($zip->statName('META-INF/manifest.xml')) { |
59
|
1 |
|
$xml = simplexml_load_string( |
60
|
1 |
|
$this->getSecurityScannerOrThrow() |
61
|
1 |
|
->scan( |
62
|
1 |
|
$zip->getFromName( |
63
|
1 |
|
'META-INF/manifest.xml' |
64
|
1 |
|
) |
65
|
1 |
|
) |
66
|
1 |
|
); |
67
|
1 |
|
if ($xml !== false) { |
68
|
1 |
|
$namespacesContent = $xml->getNamespaces(true); |
69
|
1 |
|
if (isset($namespacesContent['manifest'])) { |
70
|
1 |
|
$manifest = $xml->children($namespacesContent['manifest']); |
71
|
1 |
|
foreach ($manifest as $manifestDataSet) { |
72
|
1 |
|
$manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']); |
73
|
1 |
|
if ($manifestAttributes && $manifestAttributes->{'full-path'} == '/') { |
74
|
1 |
|
$mimeType = (string) $manifestAttributes->{'media-type'}; |
75
|
|
|
|
76
|
1 |
|
break; |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
3 |
|
$zip->close(); |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|
87
|
17 |
|
return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet'; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. |
92
|
|
|
* |
93
|
|
|
* @return string[] |
94
|
|
|
*/ |
95
|
6 |
|
public function listWorksheetNames(string $filename): array |
96
|
|
|
{ |
97
|
6 |
|
File::assertFile($filename, self::INITIAL_FILE); |
98
|
|
|
|
99
|
2 |
|
$worksheetNames = []; |
100
|
|
|
|
101
|
2 |
|
$xml = new XMLReader(); |
102
|
2 |
|
$xml->xml( |
103
|
2 |
|
$this->getSecurityScannerOrThrow() |
104
|
2 |
|
->scanFile( |
105
|
2 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
106
|
2 |
|
) |
107
|
2 |
|
); |
108
|
2 |
|
$xml->setParserProperty(2, true); |
109
|
|
|
|
110
|
|
|
// Step into the first level of content of the XML |
111
|
2 |
|
$xml->read(); |
112
|
2 |
|
while ($xml->read()) { |
113
|
|
|
// Quickly jump through to the office:body node |
114
|
2 |
|
while ($xml->name !== 'office:body') { |
115
|
2 |
|
if ($xml->isEmptyElement) { |
116
|
2 |
|
$xml->read(); |
117
|
|
|
} else { |
118
|
2 |
|
$xml->next(); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
// Now read each node until we find our first table:table node |
122
|
2 |
|
while ($xml->read()) { |
123
|
2 |
|
$xmlName = $xml->name; |
124
|
2 |
|
if ($xmlName == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
125
|
|
|
// Loop through each table:table node reading the table:name attribute for each worksheet name |
126
|
|
|
do { |
127
|
2 |
|
$worksheetName = $xml->getAttribute('table:name'); |
128
|
2 |
|
if (!empty($worksheetName)) { |
129
|
2 |
|
$worksheetNames[] = $worksheetName; |
130
|
|
|
} |
131
|
2 |
|
$xml->next(); |
132
|
2 |
|
} while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT); |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
2 |
|
return $worksheetNames; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
142
|
|
|
*/ |
143
|
6 |
|
public function listWorksheetInfo(string $filename): array |
144
|
|
|
{ |
145
|
6 |
|
File::assertFile($filename, self::INITIAL_FILE); |
146
|
|
|
|
147
|
2 |
|
$worksheetInfo = []; |
148
|
|
|
|
149
|
2 |
|
$xml = new XMLReader(); |
150
|
2 |
|
$xml->xml( |
151
|
2 |
|
$this->getSecurityScannerOrThrow() |
152
|
2 |
|
->scanFile( |
153
|
2 |
|
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE |
154
|
2 |
|
) |
155
|
2 |
|
); |
156
|
2 |
|
$xml->setParserProperty(2, true); |
157
|
|
|
|
158
|
|
|
// Step into the first level of content of the XML |
159
|
2 |
|
$xml->read(); |
160
|
2 |
|
$tableVisibility = []; |
161
|
2 |
|
$lastTableStyle = ''; |
162
|
|
|
|
163
|
2 |
|
while ($xml->read()) { |
164
|
2 |
|
if ($xml->name === 'style:style') { |
165
|
2 |
|
$styleType = $xml->getAttribute('style:family'); |
166
|
2 |
|
if ($styleType === 'table') { |
167
|
2 |
|
$lastTableStyle = $xml->getAttribute('style:name'); |
168
|
|
|
} |
169
|
2 |
|
} elseif ($xml->name === 'style:table-properties') { |
170
|
2 |
|
$visibility = $xml->getAttribute('table:display'); |
171
|
2 |
|
$tableVisibility[$lastTableStyle] = ($visibility === 'false') ? Worksheet::SHEETSTATE_HIDDEN : Worksheet::SHEETSTATE_VISIBLE; |
172
|
2 |
|
} elseif ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { |
173
|
2 |
|
$worksheetNames[] = $xml->getAttribute('table:name'); |
174
|
|
|
|
175
|
2 |
|
$styleName = $xml->getAttribute('table:style-name') ?? ''; |
176
|
2 |
|
$visibility = $tableVisibility[$styleName] ?? ''; |
177
|
2 |
|
$tmpInfo = [ |
178
|
2 |
|
'worksheetName' => $xml->getAttribute('table:name'), |
179
|
2 |
|
'lastColumnLetter' => 'A', |
180
|
2 |
|
'lastColumnIndex' => 0, |
181
|
2 |
|
'totalRows' => 0, |
182
|
2 |
|
'totalColumns' => 0, |
183
|
2 |
|
'sheetState' => $visibility, |
184
|
2 |
|
]; |
185
|
|
|
|
186
|
|
|
// Loop through each child node of the table:table element reading |
187
|
2 |
|
$currCells = 0; |
188
|
|
|
do { |
189
|
2 |
|
$xml->read(); |
190
|
2 |
|
if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { |
191
|
2 |
|
$rowspan = $xml->getAttribute('table:number-rows-repeated'); |
192
|
2 |
|
$rowspan = empty($rowspan) ? 1 : $rowspan; |
193
|
2 |
|
$tmpInfo['totalRows'] += $rowspan; |
194
|
2 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
195
|
2 |
|
$currCells = 0; |
196
|
|
|
// Step into the row |
197
|
2 |
|
$xml->read(); |
198
|
|
|
do { |
199
|
2 |
|
$doread = true; |
200
|
2 |
|
if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
201
|
2 |
|
if (!$xml->isEmptyElement) { |
202
|
2 |
|
++$currCells; |
203
|
2 |
|
$xml->next(); |
204
|
2 |
|
$doread = false; |
205
|
|
|
} |
206
|
1 |
|
} elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { |
207
|
1 |
|
$mergeSize = $xml->getAttribute('table:number-columns-repeated'); |
208
|
1 |
|
$currCells += (int) $mergeSize; |
209
|
|
|
} |
210
|
2 |
|
if ($doread) { |
211
|
1 |
|
$xml->read(); |
212
|
|
|
} |
213
|
2 |
|
} while ($xml->name != 'table:table-row'); |
214
|
|
|
} |
215
|
2 |
|
} while ($xml->name != 'table:table'); |
216
|
|
|
|
217
|
2 |
|
$tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); |
218
|
2 |
|
$tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; |
219
|
2 |
|
$tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); |
220
|
2 |
|
$worksheetInfo[] = $tmpInfo; |
221
|
|
|
} |
222
|
|
|
} |
223
|
|
|
|
224
|
2 |
|
return $worksheetInfo; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
/** |
228
|
|
|
* Loads PhpSpreadsheet from file. |
229
|
|
|
*/ |
230
|
|
|
protected function loadSpreadsheetFromFile(string $filename): Spreadsheet |
231
|
|
|
{ |
232
|
4 |
|
// Create new Spreadsheet |
233
|
|
|
$spreadsheet = new Spreadsheet(); |
234
|
4 |
|
$spreadsheet->setValueBinder($this->valueBinder); |
235
|
|
|
$spreadsheet->removeSheetByIndex(0); |
236
|
|
|
|
237
|
|
|
// Load into this instance |
238
|
|
|
return $this->loadIntoExisting($filename, $spreadsheet); |
239
|
|
|
} |
240
|
67 |
|
|
241
|
|
|
/** |
242
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
243
|
67 |
|
*/ |
244
|
67 |
|
public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet |
245
|
67 |
|
{ |
246
|
|
|
File::assertFile($filename, self::INITIAL_FILE); |
247
|
|
|
|
248
|
67 |
|
$zip = new ZipArchive(); |
249
|
|
|
$zip->open($filename); |
250
|
|
|
|
251
|
|
|
// Meta |
252
|
|
|
|
253
|
|
|
$xml = @simplexml_load_string( |
254
|
71 |
|
$this->getSecurityScannerOrThrow() |
255
|
|
|
->scan($zip->getFromName('meta.xml')) |
256
|
71 |
|
); |
257
|
|
|
if ($xml === false) { |
258
|
67 |
|
throw new Exception('Unable to read data from {$pFilename}'); |
259
|
67 |
|
} |
260
|
|
|
|
261
|
|
|
$namespacesMeta = $xml->getNamespaces(true); |
262
|
|
|
|
263
|
67 |
|
(new DocumentProperties($spreadsheet))->load($xml, $namespacesMeta); |
264
|
67 |
|
|
265
|
67 |
|
// Styles |
266
|
67 |
|
|
267
|
67 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
268
|
1 |
|
$dom->loadXML( |
269
|
|
|
$this->getSecurityScannerOrThrow() |
270
|
|
|
->scan($zip->getFromName('styles.xml')) |
271
|
66 |
|
); |
272
|
|
|
|
273
|
66 |
|
$pageSettings = new PageSettings($dom); |
274
|
|
|
|
275
|
|
|
// Main Content |
276
|
|
|
|
277
|
66 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
278
|
66 |
|
$dom->loadXML( |
279
|
66 |
|
$this->getSecurityScannerOrThrow() |
280
|
66 |
|
->scan($zip->getFromName(self::INITIAL_FILE)) |
281
|
66 |
|
); |
282
|
|
|
|
283
|
66 |
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
284
|
|
|
$tableNs = (string) $dom->lookupNamespaceUri('table'); |
285
|
|
|
$textNs = (string) $dom->lookupNamespaceUri('text'); |
286
|
|
|
$xlinkNs = (string) $dom->lookupNamespaceUri('xlink'); |
287
|
66 |
|
$styleNs = (string) $dom->lookupNamespaceUri('style'); |
288
|
66 |
|
|
289
|
66 |
|
$pageSettings->readStyleCrossReferences($dom); |
290
|
66 |
|
|
291
|
66 |
|
$autoFilterReader = new AutoFilter($spreadsheet, $tableNs); |
292
|
|
|
$definedNameReader = new DefinedNames($spreadsheet, $tableNs); |
293
|
66 |
|
$columnWidths = []; |
294
|
66 |
|
$automaticStyle0 = $dom->getElementsByTagNameNS($officeNs, 'automatic-styles')->item(0); |
295
|
66 |
|
$automaticStyles = ($automaticStyle0 === null) ? [] : $automaticStyle0->getElementsByTagNameNS($styleNs, 'style'); |
296
|
66 |
|
foreach ($automaticStyles as $automaticStyle) { |
297
|
66 |
|
$styleName = $automaticStyle->getAttributeNS($styleNs, 'name'); |
298
|
|
|
$styleFamily = $automaticStyle->getAttributeNS($styleNs, 'family'); |
299
|
66 |
|
if ($styleFamily === 'table-column') { |
300
|
|
|
$tcprops = $automaticStyle->getElementsByTagNameNS($styleNs, 'table-column-properties'); |
301
|
66 |
|
$tcprop = $tcprops->item(0); |
302
|
66 |
|
if ($tcprop !== null) { |
303
|
66 |
|
$columnWidth = $tcprop->getAttributeNs($styleNs, 'column-width'); |
304
|
66 |
|
$columnWidths[$styleName] = $columnWidth; |
305
|
66 |
|
} |
306
|
66 |
|
} |
307
|
66 |
|
} |
308
|
66 |
|
|
309
|
66 |
|
// Content |
310
|
41 |
|
$item0 = $dom->getElementsByTagNameNS($officeNs, 'body')->item(0); |
311
|
41 |
|
$spreadsheets = ($item0 === null) ? [] : $item0->getElementsByTagNameNS($officeNs, 'spreadsheet'); |
312
|
41 |
|
|
313
|
41 |
|
foreach ($spreadsheets as $workbookData) { |
314
|
41 |
|
/** @var DOMElement $workbookData */ |
315
|
41 |
|
$tables = $workbookData->getElementsByTagNameNS($tableNs, 'table'); |
316
|
|
|
|
317
|
|
|
$worksheetID = 0; |
318
|
|
|
foreach ($tables as $worksheetDataSet) { |
319
|
|
|
/** @var DOMElement $worksheetDataSet */ |
320
|
|
|
$worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name'); |
321
|
|
|
|
322
|
66 |
|
// Check loadSheetsOnly |
323
|
66 |
|
if ( |
324
|
|
|
$this->loadSheetsOnly !== null |
325
|
66 |
|
&& $worksheetName |
326
|
|
|
&& !in_array($worksheetName, $this->loadSheetsOnly) |
327
|
66 |
|
) { |
328
|
|
|
continue; |
329
|
66 |
|
} |
330
|
66 |
|
|
331
|
|
|
$worksheetStyleName = $worksheetDataSet->getAttributeNS($tableNs, 'style-name'); |
332
|
66 |
|
|
333
|
|
|
// Create sheet |
334
|
|
|
$spreadsheet->createSheet(); |
335
|
|
|
$spreadsheet->setActiveSheetIndex($worksheetID); |
336
|
66 |
|
|
337
|
|
|
if ($worksheetName || is_numeric($worksheetName)) { |
338
|
66 |
|
// Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in |
339
|
|
|
// formula cells... during the load, all formulae should be correct, and we're simply |
340
|
3 |
|
// bringing the worksheet name in line with the formula, not the reverse |
341
|
|
|
$spreadsheet->getActiveSheet()->setTitle((string) $worksheetName, false, false); |
342
|
|
|
} |
343
|
65 |
|
|
344
|
|
|
// Go through every child of table element |
345
|
|
|
$rowID = 1; |
346
|
65 |
|
$tableColumnIndex = 1; |
347
|
65 |
|
foreach ($worksheetDataSet->childNodes as $childNode) { |
348
|
|
|
/** @var DOMElement $childNode */ |
349
|
65 |
|
|
350
|
|
|
// Filter elements which are not under the "table" ns |
351
|
|
|
if ($childNode->namespaceURI != $tableNs) { |
352
|
|
|
continue; |
353
|
65 |
|
} |
354
|
|
|
|
355
|
|
|
$key = $childNode->nodeName; |
356
|
|
|
|
357
|
65 |
|
// Remove ns from node name |
358
|
65 |
|
if (str_contains($key, ':')) { |
359
|
65 |
|
$keyChunks = explode(':', $key); |
360
|
|
|
$key = array_pop($keyChunks); |
361
|
|
|
} |
362
|
|
|
|
363
|
65 |
|
switch ($key) { |
364
|
36 |
|
case 'table-header-rows': |
365
|
|
|
/// TODO :: Figure this out. This is only a partial implementation I guess. |
366
|
|
|
// ($rowData it's not used at all and I'm not sure that PHPExcel |
367
|
65 |
|
// has an API for this) |
368
|
|
|
|
369
|
|
|
// foreach ($rowData as $keyRowData => $cellData) { |
370
|
65 |
|
// $rowData = $cellData; |
371
|
65 |
|
// break; |
372
|
65 |
|
// } |
373
|
|
|
break; |
374
|
|
|
case 'table-column': |
375
|
|
|
if ($childNode->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
376
|
65 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-columns-repeated'); |
377
|
|
|
} else { |
378
|
|
|
$rowRepeats = 1; |
379
|
|
|
} |
380
|
|
|
$tableStyleName = $childNode->getAttributeNS($tableNs, 'style-name'); |
381
|
|
|
if (isset($columnWidths[$tableStyleName])) { |
382
|
|
|
$columnWidth = new HelperDimension($columnWidths[$tableStyleName]); |
383
|
|
|
$tableColumnString = Coordinate::stringFromColumnIndex($tableColumnIndex); |
384
|
|
|
for ($rowRepeats2 = $rowRepeats; $rowRepeats2 > 0; --$rowRepeats2) { |
385
|
|
|
$spreadsheet->getActiveSheet() |
386
|
65 |
|
->getColumnDimension($tableColumnString) |
387
|
40 |
|
->setWidth($columnWidth->toUnit('cm'), 'cm'); |
388
|
36 |
|
++$tableColumnString; |
389
|
|
|
} |
390
|
15 |
|
} |
391
|
|
|
$tableColumnIndex += $rowRepeats; |
392
|
40 |
|
|
393
|
40 |
|
break; |
394
|
40 |
|
case 'table-row': |
395
|
40 |
|
if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) { |
396
|
40 |
|
$rowRepeats = (int) $childNode->getAttributeNS($tableNs, 'number-rows-repeated'); |
397
|
40 |
|
} else { |
398
|
40 |
|
$rowRepeats = 1; |
399
|
40 |
|
} |
400
|
40 |
|
|
401
|
|
|
$columnID = 'A'; |
402
|
|
|
/** @var DOMElement|DOMText $cellData */ |
403
|
40 |
|
foreach ($childNode->childNodes as $cellData) { |
404
|
|
|
if ($cellData instanceof DOMText) { |
405
|
40 |
|
continue; // should just be whitespace |
406
|
64 |
|
} |
407
|
64 |
|
if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { |
408
|
24 |
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
409
|
|
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
410
|
64 |
|
} else { |
411
|
|
|
$colRepeats = 1; |
412
|
|
|
} |
413
|
64 |
|
|
414
|
|
|
for ($i = 0; $i < $colRepeats; ++$i) { |
415
|
64 |
|
++$columnID; |
416
|
64 |
|
} |
417
|
1 |
|
|
418
|
|
|
continue; |
419
|
64 |
|
} |
420
|
64 |
|
|
421
|
2 |
|
// Initialize variables |
422
|
1 |
|
$formatting = $hyperlink = null; |
423
|
|
|
$hasCalculatedValue = false; |
424
|
2 |
|
$cellDataFormula = ''; |
425
|
|
|
$cellDataType = ''; |
426
|
|
|
$cellDataRef = ''; |
427
|
2 |
|
|
428
|
2 |
|
if ($cellData->hasAttributeNS($tableNs, 'formula')) { |
429
|
|
|
$cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula'); |
430
|
|
|
$hasCalculatedValue = true; |
431
|
2 |
|
} |
432
|
|
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-columns-spanned')) { |
433
|
|
|
if ($cellData->hasAttributeNS($tableNs, 'number-matrix-rows-spanned')) { |
434
|
|
|
$cellDataType = 'array'; |
435
|
|
|
$arrayRow = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-rows-spanned'); |
436
|
64 |
|
$arrayCol = (int) $cellData->getAttributeNS($tableNs, 'number-matrix-columns-spanned'); |
437
|
64 |
|
$lastRow = $rowID + $arrayRow - 1; |
438
|
64 |
|
$lastCol = $columnID; |
439
|
64 |
|
while ($arrayCol > 1) { |
440
|
64 |
|
++$lastCol; |
441
|
|
|
--$arrayCol; |
442
|
64 |
|
} |
443
|
25 |
|
$cellDataRef = "$columnID$rowID:$lastCol$lastRow"; |
444
|
25 |
|
} |
445
|
|
|
} |
446
|
64 |
|
|
447
|
12 |
|
// Annotations |
448
|
12 |
|
$annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation'); |
449
|
12 |
|
|
450
|
12 |
|
if ($annotation->length > 0 && $annotation->item(0) !== null) { |
451
|
12 |
|
$textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p'); |
452
|
12 |
|
$textNodeLength = $textNode->length; |
453
|
12 |
|
$newLineOwed = false; |
454
|
7 |
|
for ($textNodeIndex = 0; $textNodeIndex < $textNodeLength; ++$textNodeIndex) { |
455
|
7 |
|
$textNodeItem = $textNode->item($textNodeIndex); |
456
|
|
|
if ($textNodeItem !== null) { |
457
|
12 |
|
$text = $this->scanElementForText($textNodeItem); |
458
|
|
|
if ($newLineOwed) { |
459
|
|
|
$spreadsheet->getActiveSheet() |
460
|
|
|
->getComment($columnID . $rowID) |
461
|
|
|
->getText() |
462
|
64 |
|
->createText("\n"); |
463
|
|
|
} |
464
|
64 |
|
$newLineOwed = true; |
465
|
11 |
|
|
466
|
11 |
|
$spreadsheet->getActiveSheet() |
467
|
11 |
|
->getComment($columnID . $rowID) |
468
|
11 |
|
->getText() |
469
|
11 |
|
->createText($this->parseRichText($text)); |
470
|
11 |
|
} |
471
|
11 |
|
} |
472
|
11 |
|
} |
473
|
1 |
|
|
474
|
1 |
|
// Content |
475
|
1 |
|
|
476
|
1 |
|
/** @var DOMElement[] $paragraphs */ |
477
|
|
|
$paragraphs = []; |
478
|
11 |
|
|
479
|
|
|
foreach ($cellData->childNodes as $item) { |
480
|
11 |
|
/** @var DOMElement $item */ |
481
|
11 |
|
|
482
|
11 |
|
// Filter text:p elements |
483
|
11 |
|
if ($item->nodeName == 'text:p') { |
484
|
|
|
$paragraphs[] = $item; |
485
|
|
|
} |
486
|
|
|
} |
487
|
|
|
|
488
|
|
|
if (count($paragraphs) > 0) { |
489
|
|
|
// Consolidate if there are multiple p records (maybe with spans as well) |
490
|
|
|
$dataArray = []; |
491
|
64 |
|
|
492
|
|
|
// Text can have multiple text:p and within those, multiple text:span. |
493
|
64 |
|
// text:p newlines, but text:span does not. |
494
|
|
|
// Also, here we assume there is no text data is span fields are specified, since |
495
|
|
|
// we have no way of knowing proper positioning anyway. |
496
|
|
|
|
497
|
64 |
|
foreach ($paragraphs as $pData) { |
498
|
64 |
|
$dataArray[] = $this->scanElementForText($pData); |
499
|
|
|
} |
500
|
|
|
$allCellDataText = implode("\n", $dataArray); |
501
|
|
|
|
502
|
64 |
|
$type = $cellData->getAttributeNS($officeNs, 'value-type'); |
503
|
|
|
|
504
|
64 |
|
switch ($type) { |
505
|
|
|
case 'string': |
506
|
|
|
$type = DataType::TYPE_STRING; |
507
|
|
|
$dataValue = $allCellDataText; |
508
|
|
|
|
509
|
|
|
foreach ($paragraphs as $paragraph) { |
510
|
|
|
$link = $paragraph->getElementsByTagNameNS($textNs, 'a'); |
511
|
64 |
|
if ($link->length > 0 && $link->item(0) !== null) { |
512
|
64 |
|
$hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href'); |
513
|
|
|
} |
514
|
64 |
|
} |
515
|
|
|
|
516
|
64 |
|
break; |
517
|
|
|
case 'boolean': |
518
|
|
|
$type = DataType::TYPE_BOOL; |
519
|
64 |
|
$dataValue = ($cellData->getAttributeNS($officeNs, 'boolean-value') === 'true') ? true : false; |
520
|
42 |
|
|
521
|
42 |
|
break; |
522
|
|
|
case 'percentage': |
523
|
42 |
|
$type = DataType::TYPE_NUMERIC; |
524
|
42 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
525
|
42 |
|
|
526
|
7 |
|
// percentage should always be float |
527
|
|
|
//if (floor($dataValue) == $dataValue) { |
528
|
|
|
// $dataValue = (int) $dataValue; |
529
|
|
|
//} |
530
|
42 |
|
$formatting = NumberFormat::FORMAT_PERCENTAGE_00; |
531
|
43 |
|
|
532
|
8 |
|
break; |
533
|
8 |
|
case 'currency': |
534
|
|
|
$type = DataType::TYPE_NUMERIC; |
535
|
8 |
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
536
|
41 |
|
|
537
|
4 |
|
if (floor($dataValue) == $dataValue) { |
538
|
4 |
|
$dataValue = (int) $dataValue; |
539
|
|
|
} |
540
|
|
|
$formatting = NumberFormat::FORMAT_CURRENCY_USD_INTEGER; |
541
|
|
|
|
542
|
|
|
break; |
543
|
|
|
case 'float': |
544
|
4 |
|
$type = DataType::TYPE_NUMERIC; |
545
|
|
|
$dataValue = (float) $cellData->getAttributeNS($officeNs, 'value'); |
546
|
4 |
|
|
547
|
41 |
|
if (floor($dataValue) == $dataValue) { |
548
|
4 |
|
if ($dataValue == (int) $dataValue) { |
549
|
4 |
|
$dataValue = (int) $dataValue; |
550
|
|
|
} |
551
|
4 |
|
} |
552
|
4 |
|
|
553
|
|
|
break; |
554
|
4 |
|
case 'date': |
555
|
|
|
$type = DataType::TYPE_NUMERIC; |
556
|
4 |
|
$value = $cellData->getAttributeNS($officeNs, 'date-value'); |
557
|
37 |
|
$dataValue = Date::convertIsoDate($value); |
558
|
37 |
|
|
559
|
37 |
|
if ($dataValue != floor($dataValue)) { |
560
|
|
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15 |
561
|
37 |
|
. ' ' |
562
|
35 |
|
. NumberFormat::FORMAT_DATE_TIME4; |
563
|
35 |
|
} else { |
564
|
|
|
$formatting = NumberFormat::FORMAT_DATE_XLSX15; |
565
|
|
|
} |
566
|
|
|
|
567
|
37 |
|
break; |
568
|
7 |
|
case 'time': |
569
|
7 |
|
$type = DataType::TYPE_NUMERIC; |
570
|
7 |
|
|
571
|
7 |
|
$timeValue = $cellData->getAttributeNS($officeNs, 'time-value'); |
572
|
|
|
|
573
|
7 |
|
$dataValue = Date::PHPToExcel( |
574
|
6 |
|
strtotime( |
575
|
6 |
|
'01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS') ?? []) |
576
|
6 |
|
) |
577
|
|
|
); |
578
|
7 |
|
$formatting = NumberFormat::FORMAT_DATE_TIME4; |
579
|
|
|
|
580
|
|
|
break; |
581
|
7 |
|
default: |
582
|
6 |
|
$dataValue = null; |
583
|
6 |
|
} |
584
|
|
|
} else { |
585
|
6 |
|
$type = DataType::TYPE_NULL; |
586
|
|
|
$dataValue = null; |
587
|
6 |
|
} |
588
|
6 |
|
|
589
|
6 |
|
if ($hasCalculatedValue) { |
590
|
6 |
|
$type = DataType::TYPE_FORMULA; |
591
|
6 |
|
$cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); |
592
|
6 |
|
$cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula); |
593
|
|
|
} |
594
|
6 |
|
|
595
|
|
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { |
596
|
|
|
$colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated'); |
597
|
|
|
} else { |
598
|
|
|
$colRepeats = 1; |
599
|
38 |
|
} |
600
|
38 |
|
|
601
|
|
|
if ($type !== null) { // @phpstan-ignore-line |
602
|
|
|
for ($i = 0; $i < $colRepeats; ++$i) { |
603
|
64 |
|
if ($i > 0) { |
604
|
25 |
|
++$columnID; |
605
|
25 |
|
} |
606
|
25 |
|
|
607
|
|
|
if ($type !== DataType::TYPE_NULL) { |
608
|
|
|
for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) { |
609
|
64 |
|
$rID = $rowID + $rowAdjust; |
610
|
34 |
|
|
611
|
|
|
$cell = $spreadsheet->getActiveSheet() |
612
|
64 |
|
->getCell($columnID . $rID); |
613
|
|
|
|
614
|
|
|
// Set value |
615
|
64 |
|
if ($hasCalculatedValue) { |
616
|
64 |
|
$cell->setValueExplicit($cellDataFormula, $type); |
617
|
64 |
|
if ($cellDataType === 'array') { |
618
|
34 |
|
$cell->setFormulaAttributes(['t' => 'array', 'ref' => $cellDataRef]); |
619
|
|
|
} |
620
|
|
|
} else { |
621
|
64 |
|
$cell->setValueExplicit($dataValue, $type); |
622
|
64 |
|
} |
623
|
64 |
|
|
624
|
|
|
if ($hasCalculatedValue) { |
625
|
64 |
|
$cell->setCalculatedValue($dataValue, $type === DataType::TYPE_NUMERIC); |
626
|
64 |
|
} |
627
|
|
|
|
628
|
|
|
// Set other properties |
629
|
64 |
|
if ($formatting !== null) { |
630
|
25 |
|
$spreadsheet->getActiveSheet() |
631
|
25 |
|
->getStyle($columnID . $rID) |
632
|
12 |
|
->getNumberFormat() |
633
|
|
|
->setFormatCode($formatting); |
634
|
|
|
} else { |
635
|
63 |
|
$spreadsheet->getActiveSheet() |
636
|
|
|
->getStyle($columnID . $rID) |
637
|
|
|
->getNumberFormat() |
638
|
64 |
|
->setFormatCode(NumberFormat::FORMAT_GENERAL); |
639
|
25 |
|
} |
640
|
|
|
|
641
|
|
|
if ($hyperlink !== null) { |
642
|
|
|
if ($hyperlink[0] === '#') { |
643
|
64 |
|
$hyperlink = 'sheet://' . substr($hyperlink, 1); |
644
|
11 |
|
} |
645
|
11 |
|
$cell->getHyperlink() |
646
|
11 |
|
->setUrl($hyperlink); |
647
|
11 |
|
} |
648
|
|
|
} |
649
|
64 |
|
} |
650
|
64 |
|
} |
651
|
64 |
|
} |
652
|
64 |
|
|
653
|
|
|
// Merged cells |
654
|
|
|
$this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet); |
655
|
64 |
|
|
656
|
7 |
|
++$columnID; |
657
|
1 |
|
} |
658
|
|
|
$rowID += $rowRepeats; |
659
|
7 |
|
|
660
|
7 |
|
break; |
661
|
|
|
} |
662
|
|
|
} |
663
|
|
|
$pageSettings->setVisibilityForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
664
|
|
|
$pageSettings->setPrintSettingsForWorksheet($spreadsheet->getActiveSheet(), $worksheetStyleName); |
665
|
|
|
++$worksheetID; |
666
|
|
|
} |
667
|
|
|
|
668
|
64 |
|
$autoFilterReader->read($workbookData); |
669
|
|
|
$definedNameReader->read($workbookData); |
670
|
64 |
|
} |
671
|
|
|
$spreadsheet->setActiveSheetIndex(0); |
672
|
64 |
|
|
673
|
|
|
if ($zip->locateName('settings.xml') !== false) { |
674
|
64 |
|
$this->processSettings($zip, $spreadsheet); |
675
|
|
|
} |
676
|
|
|
|
677
|
65 |
|
// Return |
678
|
65 |
|
return $spreadsheet; |
679
|
65 |
|
} |
680
|
|
|
|
681
|
|
|
private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): void |
682
|
66 |
|
{ |
683
|
66 |
|
$dom = new DOMDocument('1.01', 'UTF-8'); |
684
|
|
|
$dom->loadXML( |
685
|
66 |
|
$this->getSecurityScannerOrThrow() |
686
|
|
|
->scan($zip->getFromName('settings.xml')) |
687
|
65 |
|
); |
688
|
58 |
|
//$xlinkNs = $dom->lookupNamespaceUri('xlink'); |
689
|
|
|
$configNs = (string) $dom->lookupNamespaceUri('config'); |
690
|
|
|
//$oooNs = $dom->lookupNamespaceUri('ooo'); |
691
|
|
|
$officeNs = (string) $dom->lookupNamespaceUri('office'); |
692
|
65 |
|
$settings = $dom->getElementsByTagNameNS($officeNs, 'settings') |
693
|
|
|
->item(0); |
694
|
|
|
if ($settings !== null) { |
695
|
58 |
|
$this->lookForActiveSheet($settings, $spreadsheet, $configNs); |
696
|
|
|
$this->lookForSelectedCells($settings, $spreadsheet, $configNs); |
697
|
58 |
|
} |
698
|
58 |
|
} |
699
|
58 |
|
|
700
|
58 |
|
private function lookForActiveSheet(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
701
|
58 |
|
{ |
702
|
|
|
/** @var DOMElement $t */ |
703
|
58 |
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item') as $t) { |
704
|
|
|
if ($t->getAttributeNs($configNs, 'name') === 'ActiveTable') { |
705
|
58 |
|
try { |
706
|
58 |
|
$spreadsheet->setActiveSheetIndexByName($t->nodeValue ?? ''); |
707
|
58 |
|
} catch (Throwable) { |
708
|
58 |
|
// do nothing |
709
|
58 |
|
} |
710
|
58 |
|
|
711
|
|
|
break; |
712
|
|
|
} |
713
|
|
|
} |
714
|
58 |
|
} |
715
|
|
|
|
716
|
|
|
private function lookForSelectedCells(DOMElement $settings, Spreadsheet $spreadsheet, string $configNs): void |
717
|
58 |
|
{ |
718
|
57 |
|
/** @var DOMElement $t */ |
719
|
|
|
foreach ($settings->getElementsByTagNameNS($configNs, 'config-item-map-named') as $t) { |
720
|
57 |
|
if ($t->getAttributeNs($configNs, 'name') === 'Tables') { |
721
|
1 |
|
foreach ($t->getElementsByTagNameNS($configNs, 'config-item-map-entry') as $ws) { |
722
|
|
|
$setRow = $setCol = ''; |
723
|
|
|
$wsname = $ws->getAttributeNs($configNs, 'name'); |
724
|
|
|
foreach ($ws->getElementsByTagNameNS($configNs, 'config-item') as $configItem) { |
725
|
57 |
|
$attrName = $configItem->getAttributeNs($configNs, 'name'); |
726
|
|
|
if ($attrName === 'CursorPositionX') { |
727
|
|
|
$setCol = $configItem->nodeValue; |
728
|
|
|
} |
729
|
|
|
if ($attrName === 'CursorPositionY') { |
730
|
58 |
|
$setRow = $configItem->nodeValue; |
731
|
|
|
} |
732
|
|
|
} |
733
|
58 |
|
$this->setSelected($spreadsheet, $wsname, "$setCol", "$setRow"); |
734
|
57 |
|
} |
735
|
57 |
|
|
736
|
57 |
|
break; |
737
|
57 |
|
} |
738
|
57 |
|
} |
739
|
57 |
|
} |
740
|
57 |
|
|
741
|
57 |
|
private function setSelected(Spreadsheet $spreadsheet, string $wsname, string $setCol, string $setRow): void |
742
|
|
|
{ |
743
|
57 |
|
if (is_numeric($setCol) && is_numeric($setRow)) { |
744
|
57 |
|
$sheet = $spreadsheet->getSheetByName($wsname); |
745
|
|
|
if ($sheet !== null) { |
746
|
|
|
$sheet->setSelectedCells([(int) $setCol + 1, (int) $setRow + 1]); |
747
|
57 |
|
} |
748
|
|
|
} |
749
|
|
|
} |
750
|
57 |
|
|
751
|
|
|
/** |
752
|
|
|
* Recursively scan element. |
753
|
|
|
*/ |
754
|
|
|
protected function scanElementForText(DOMNode $element): string |
755
|
57 |
|
{ |
756
|
|
|
$str = ''; |
757
|
57 |
|
foreach ($element->childNodes as $child) { |
758
|
57 |
|
/** @var DOMNode $child */ |
759
|
57 |
|
if ($child->nodeType == XML_TEXT_NODE) { |
760
|
57 |
|
$str .= $child->nodeValue; |
761
|
|
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:line-break') { |
762
|
|
|
$str .= "\n"; |
763
|
|
|
} elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') { |
764
|
|
|
// It's a space |
765
|
|
|
|
766
|
|
|
// Multiple spaces? |
767
|
|
|
$attributes = $child->attributes; |
768
|
64 |
|
/** @var ?DOMAttr $cAttr */ |
769
|
|
|
$cAttr = ($attributes === null) ? null : $attributes->getNamedItem('c'); |
770
|
64 |
|
$multiplier = self::getMultiplier($cAttr); |
771
|
64 |
|
$str .= str_repeat(' ', $multiplier); |
772
|
|
|
} |
773
|
64 |
|
|
774
|
64 |
|
if ($child->hasChildNodes()) { |
775
|
16 |
|
$str .= $this->scanElementForText($child); |
776
|
1 |
|
} |
777
|
16 |
|
} |
778
|
|
|
|
779
|
|
|
return $str; |
780
|
|
|
} |
781
|
4 |
|
|
782
|
|
|
private static function getMultiplier(?DOMAttr $cAttr): int |
783
|
4 |
|
{ |
784
|
4 |
|
if ($cAttr) { |
785
|
4 |
|
$multiplier = (int) $cAttr->nodeValue; |
786
|
|
|
} else { |
787
|
|
|
$multiplier = 1; |
788
|
64 |
|
} |
789
|
16 |
|
|
790
|
|
|
return $multiplier; |
791
|
|
|
} |
792
|
|
|
|
793
|
64 |
|
private function parseRichText(string $is): RichText |
794
|
|
|
{ |
795
|
|
|
$value = new RichText(); |
796
|
4 |
|
$value->createText($is); |
797
|
|
|
|
798
|
4 |
|
return $value; |
799
|
4 |
|
} |
800
|
|
|
|
801
|
4 |
|
private function processMergedCells( |
802
|
|
|
DOMElement $cellData, |
803
|
|
|
string $tableNs, |
804
|
4 |
|
string $type, |
805
|
|
|
string $columnID, |
806
|
|
|
int $rowID, |
807
|
11 |
|
Spreadsheet $spreadsheet |
808
|
|
|
): void { |
809
|
11 |
|
if ( |
810
|
11 |
|
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned') |
811
|
|
|
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') |
812
|
11 |
|
) { |
813
|
|
|
if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) { |
814
|
|
|
$columnTo = $columnID; |
815
|
64 |
|
|
816
|
|
|
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { |
817
|
|
|
$columnIndex = Coordinate::columnIndexFromString($columnID); |
818
|
|
|
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); |
819
|
|
|
$columnIndex -= 2; |
820
|
|
|
|
821
|
|
|
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); |
822
|
|
|
} |
823
|
|
|
|
824
|
64 |
|
$rowTo = $rowID; |
825
|
64 |
|
|
826
|
|
|
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { |
827
|
17 |
|
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; |
828
|
17 |
|
} |
829
|
|
|
|
830
|
17 |
|
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; |
831
|
16 |
|
$spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE); |
832
|
16 |
|
} |
833
|
16 |
|
} |
834
|
|
|
} |
835
|
|
|
} |
836
|
|
|
|