1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Linio\Component\SpreadsheetParser\Parser; |
4
|
|
|
|
5
|
|
|
use Linio\Component\SpreadsheetParser\Exception\SpreadsheetParsingException; |
6
|
|
|
|
7
|
|
|
class XlsxParser implements ParserInterface |
8
|
|
|
{ |
9
|
|
|
const OPTION_HAS_COLUMN_NAMES = 'has_column_names'; |
10
|
|
|
const OPTION_SHEET_INDEX = 'sheet_index'; |
11
|
|
|
const OPTION_SHEET_NAME = 'sheet_name'; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* @var string |
15
|
|
|
*/ |
16
|
|
|
protected $filePath; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @var string |
20
|
|
|
*/ |
21
|
|
|
protected $tmpDir; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var bool |
25
|
|
|
*/ |
26
|
|
|
protected $hasColumnNames; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var int |
30
|
|
|
*/ |
31
|
|
|
protected $sheetIndex; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var string |
35
|
|
|
*/ |
36
|
|
|
protected $sheetName; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @var array |
40
|
|
|
*/ |
41
|
|
|
protected $columnNames; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* @var \SimpleXMLElement |
45
|
|
|
*/ |
46
|
|
|
protected $appXml; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @var \SimpleXMLElement |
50
|
|
|
*/ |
51
|
|
|
protected $sharedStringsXml; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* @var \SimpleXMLElement |
55
|
|
|
*/ |
56
|
|
|
protected $sheetXml; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @var \SimpleXMLElement |
60
|
|
|
*/ |
61
|
|
|
protected $workbookXml; |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @var \SimpleXMLElement |
65
|
|
|
*/ |
66
|
|
|
protected $relationshipsXml; |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* @param string $filePath |
70
|
|
|
*/ |
71
|
|
|
public function __construct($filePath, array $options = []) |
72
|
|
|
{ |
73
|
|
|
$this->filePath = $filePath; |
74
|
|
|
|
75
|
|
|
// default options |
76
|
|
|
$this->hasColumnNames = true; |
77
|
|
|
$this->sheetIndex = 1; |
78
|
|
|
$this->sheetName = null; |
79
|
|
|
|
80
|
|
|
$this->loadParserOptions($options); |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @SuppressWarnings(PHPMD.ExcessiveMethodLength) |
85
|
|
|
* |
86
|
|
|
* @throws SpreadsheetParsingException |
87
|
|
|
* |
88
|
|
|
* @return bool |
89
|
|
|
*/ |
90
|
|
|
public function open() |
91
|
|
|
{ |
92
|
|
|
if ($this->sheetXml) { |
93
|
|
|
return true; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
$this->tmpDir = tempnam(sys_get_temp_dir(), 'xls'); |
97
|
|
|
unlink($this->tmpDir); |
98
|
|
|
|
99
|
|
|
$zip = new \ZipArchive(); |
100
|
|
|
$zip->open($this->filePath); |
101
|
|
|
$zip->extractTo($this->tmpDir); |
102
|
|
|
$zip->close(); |
103
|
|
|
|
104
|
|
|
if ($this->sheetName) { |
105
|
|
|
$this->setSheetIndexFromSheetName(); |
106
|
|
|
if (!$this->sheetIndex) { |
107
|
|
|
throw new SpreadsheetParsingException('Sheet not found: ' . $this->sheetName); |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
$this->loadXlsxXmlFiles(); |
112
|
|
|
|
113
|
|
|
return true; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* @SuppressWarnings(PHPMD.ExcessiveMethodLength) |
118
|
|
|
* |
119
|
|
|
* @return array|false |
120
|
|
|
*/ |
121
|
|
|
public function getColumnNames() |
122
|
|
|
{ |
123
|
|
|
if (!$this->hasColumnNames) { |
124
|
|
|
return false; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
if ($this->columnNames) { |
|
|
|
|
128
|
|
|
return $this->columnNames; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
$this->open(); |
132
|
|
|
|
133
|
|
|
$rows = $this->sheetXml->sheetData->row; |
134
|
|
|
|
135
|
|
|
$row = $rows[0]; |
136
|
|
|
$this->columnNames = $this->getRowContent($row, $this->sharedStringsXml); |
137
|
|
|
|
138
|
|
|
return $this->columnNames; |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
/** |
142
|
|
|
* @param int $numRows |
143
|
|
|
* |
144
|
|
|
* @return array |
145
|
|
|
*/ |
146
|
|
View Code Duplication |
public function getData($numRows = 0) |
|
|
|
|
147
|
|
|
{ |
148
|
|
|
$this->open(); |
149
|
|
|
|
150
|
|
|
$skipLine = false; |
151
|
|
|
if ($this->hasColumnNames) { |
152
|
|
|
$skipLine = true; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
$data = $this->readDataFromFile($numRows, $skipLine); |
156
|
|
|
|
157
|
|
|
return $data; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* @return bool |
162
|
|
|
*/ |
163
|
|
|
public function close() |
164
|
|
|
{ |
165
|
|
|
if ($this->tmpDir && file_exists($this->tmpDir)) { |
166
|
|
|
$this->delTree($this->tmpDir); |
167
|
|
|
$this->tmpDir = null; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
return true; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
public function __destruct() |
174
|
|
|
{ |
175
|
|
|
$this->close(); |
176
|
|
|
} |
177
|
|
|
|
178
|
|
|
/** |
179
|
|
|
* @param \SimpleXMLElement $row |
180
|
|
|
* @param \SimpleXMLElement $sharedStrings |
181
|
|
|
* |
182
|
|
|
* @SuppressWarnings(PHPMD.ExcessiveMethodLength) |
183
|
|
|
* |
184
|
|
|
* @return array |
185
|
|
|
*/ |
186
|
|
|
protected function getRowContent(\SimpleXMLElement $row, \SimpleXMLElement $sharedStrings) |
187
|
|
|
{ |
188
|
|
|
$rowContent = []; |
189
|
|
|
foreach ($row->c as $cell) { |
190
|
|
|
$v = (string) $cell->v; |
191
|
|
|
|
192
|
|
|
if (isset($cell['t']) && $cell['t'] == 's') { |
193
|
|
|
$s = []; |
194
|
|
|
$si = $sharedStrings->si[(int) $v]; |
195
|
|
|
$si->registerXPathNamespace('n', 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'); |
196
|
|
|
foreach ($si->xpath('.//n:t') as $t) { |
197
|
|
|
$s[] = (string) $t; |
198
|
|
|
} |
199
|
|
|
$v = implode($s); |
200
|
|
|
} |
201
|
|
|
$rowContent[] = $v; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
return $rowContent; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
/** |
208
|
|
|
* @param $dir |
209
|
|
|
* |
210
|
|
|
* @return bool |
211
|
|
|
*/ |
212
|
|
|
protected function delTree($dir) |
|
|
|
|
213
|
|
|
{ |
214
|
|
|
$files = array_diff(scandir($dir), ['.', '..']); |
215
|
|
|
foreach ($files as $file) { |
216
|
|
|
(is_dir("$dir/$file")) ? $this->delTree("$dir/$file") : unlink("$dir/$file"); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
return rmdir($dir); |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
/** |
223
|
|
|
* @param array $options |
224
|
|
|
*/ |
225
|
|
|
protected function loadParserOptions(array $options) |
226
|
|
|
{ |
227
|
|
|
if (isset($options[static::OPTION_HAS_COLUMN_NAMES])) { |
228
|
|
|
$this->hasColumnNames = $options[static::OPTION_HAS_COLUMN_NAMES]; |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
if (isset($options[static::OPTION_SHEET_INDEX])) { |
232
|
|
|
$this->sheetIndex = $options[static::OPTION_SHEET_INDEX]; |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
if (isset($options[static::OPTION_SHEET_NAME])) { |
236
|
|
|
$this->sheetName = $options[static::OPTION_SHEET_NAME]; |
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
/** |
241
|
|
|
* @SuppressWarnings(PHPMD.ExcessiveMethodLength) |
242
|
|
|
*/ |
243
|
|
|
protected function setSheetIndexFromSheetName() |
244
|
|
|
{ |
245
|
|
|
$this->sheetIndex = null; |
246
|
|
|
try { |
247
|
|
|
$this->appXml = simplexml_load_file($this->tmpDir . '/docProps/app.xml'); |
248
|
|
|
} catch (\Exception $e) { |
249
|
|
|
throw new SpreadsheetParsingException('Error parsing XLSX internal files'); |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
$sheetIndex = 1; |
253
|
|
|
$this->appXml->registerXPathNamespace('ex', 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties'); |
254
|
|
|
foreach ($this->appXml->xpath('//ex:TitlesOfParts/vt:vector/vt:lpstr') as $sheetName) { |
255
|
|
|
if ($this->sheetName == (string) $sheetName) { |
256
|
|
|
$this->sheetIndex = $sheetIndex; |
257
|
|
|
break; |
258
|
|
|
} |
259
|
|
|
$sheetIndex++; |
260
|
|
|
} |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
protected function loadXlsxXmlFiles() |
264
|
|
|
{ |
265
|
|
|
try { |
266
|
|
|
$this->sharedStringsXml = simplexml_load_file($this->tmpDir . '/xl/sharedStrings.xml'); |
267
|
|
|
$this->workbookXml = simplexml_load_file($this->tmpDir . '/xl/workbook.xml'); |
268
|
|
|
$this->relationshipsXml = simplexml_load_file($this->tmpDir . '/xl/_rels/workbook.xml.rels'); |
269
|
|
|
$this->sheetXml = simplexml_load_file($this->tmpDir . '/xl/worksheets/sheet' . $this->sheetIndex . '.xml'); |
270
|
|
|
} catch (\Exception $e) { |
271
|
|
|
throw new SpreadsheetParsingException('Error parsing XLSX internal files'); |
272
|
|
|
} |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
/** |
276
|
|
|
* @param $numRows |
277
|
|
|
* @param $skipLine |
278
|
|
|
* |
279
|
|
|
* @SuppressWarnings(PHPMD.ExcessiveMethodLength) |
280
|
|
|
* |
281
|
|
|
* @return array |
282
|
|
|
*/ |
283
|
|
|
protected function readDataFromFile($numRows, $skipLine) |
284
|
|
|
{ |
285
|
|
|
$data = []; |
286
|
|
|
$rows = $this->sheetXml->sheetData->row; |
287
|
|
|
foreach ($rows as $row) { |
288
|
|
|
if ($skipLine) { |
289
|
|
|
$skipLine = false; |
290
|
|
|
continue; |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
$data[] = $this->columnNames = $this->getRowContent($row, $this->sharedStringsXml); |
294
|
|
|
|
295
|
|
|
$numRows--; |
296
|
|
|
if ($numRows == 0) { |
297
|
|
|
break; |
298
|
|
|
} |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
return $data; |
302
|
|
|
} |
303
|
|
|
} |
304
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.