XlsxParser::getRowContent()   A
last analyzed

Complexity

Conditions 5
Paths 3

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 20
rs 9.2888
c 0
b 0
f 0
cc 5
nc 3
nop 2
1
<?php
2
3
namespace Linio\Component\SpreadsheetParser\Parser;
4
5
use Linio\Component\SpreadsheetParser\Exception\SpreadsheetParsingException;
6
7
class XlsxParser implements ParserInterface
8
{
9
    const OPTION_HAS_COLUMN_NAMES = 'has_column_names';
10
    const OPTION_SHEET_INDEX = 'sheet_index';
11
    const OPTION_SHEET_NAME = 'sheet_name';
12
13
    /**
14
     * @var string
15
     */
16
    protected $filePath;
17
18
    /**
19
     * @var string
20
     */
21
    protected $tmpDir;
22
23
    /**
24
     * @var bool
25
     */
26
    protected $hasColumnNames;
27
28
    /**
29
     * @var int
30
     */
31
    protected $sheetIndex;
32
33
    /**
34
     * @var string
35
     */
36
    protected $sheetName;
37
38
    /**
39
     * @var array
40
     */
41
    protected $columnNames;
42
43
    /**
44
     * @var \SimpleXMLElement
45
     */
46
    protected $appXml;
47
48
    /**
49
     * @var \SimpleXMLElement
50
     */
51
    protected $sharedStringsXml;
52
53
    /**
54
     * @var \SimpleXMLElement
55
     */
56
    protected $sheetXml;
57
58
    /**
59
     * @var \SimpleXMLElement
60
     */
61
    protected $workbookXml;
62
63
    /**
64
     * @var \SimpleXMLElement
65
     */
66
    protected $relationshipsXml;
67
68
    /**
69
     * @param string $filePath
70
     */
71
    public function __construct($filePath, array $options = [])
72
    {
73
        $this->filePath = $filePath;
74
75
        // default options
76
        $this->hasColumnNames = true;
77
        $this->sheetIndex = 1;
78
        $this->sheetName = null;
79
80
        $this->loadParserOptions($options);
81
    }
82
83
    /**
84
     * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
85
     *
86
     * @throws SpreadsheetParsingException
87
     *
88
     * @return bool
89
     */
90
    public function open()
91
    {
92
        if ($this->sheetXml) {
93
            return true;
94
        }
95
96
        $this->tmpDir = tempnam(sys_get_temp_dir(), 'xls');
97
        unlink($this->tmpDir);
98
99
        $zip = new \ZipArchive();
100
        $zip->open($this->filePath);
101
        $zip->extractTo($this->tmpDir);
102
        $zip->close();
103
104
        if ($this->sheetName) {
105
            $this->setSheetIndexFromSheetName();
106
            if (!$this->sheetIndex) {
107
                throw new SpreadsheetParsingException('Sheet not found: ' . $this->sheetName);
108
            }
109
        }
110
111
        $this->loadXlsxXmlFiles();
112
113
        return true;
114
    }
115
116
    /**
117
     * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
118
     *
119
     * @return array|false
120
     */
121
    public function getColumnNames()
122
    {
123
        if (!$this->hasColumnNames) {
124
            return false;
125
        }
126
127
        if ($this->columnNames) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnNames of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
128
            return $this->columnNames;
129
        }
130
131
        $this->open();
132
133
        $rows = $this->sheetXml->sheetData->row;
134
135
        $row = $rows[0];
136
        $this->columnNames = $this->getRowContent($row, $this->sharedStringsXml);
137
138
        return $this->columnNames;
139
    }
140
141
    /**
142
     * @param int $numRows
143
     *
144
     * @return array
145
     */
146 View Code Duplication
    public function getData($numRows = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
147
    {
148
        $this->open();
149
150
        $skipLine = false;
151
        if ($this->hasColumnNames) {
152
            $skipLine = true;
153
        }
154
155
        $data = $this->readDataFromFile($numRows, $skipLine);
156
157
        return $data;
158
    }
159
160
    /**
161
     * @return bool
162
     */
163
    public function close()
164
    {
165
        if ($this->tmpDir && file_exists($this->tmpDir)) {
166
            $this->delTree($this->tmpDir);
167
            $this->tmpDir = null;
168
        }
169
170
        return true;
171
    }
172
173
    public function __destruct()
174
    {
175
        $this->close();
176
    }
177
178
    /**
179
     * @param \SimpleXMLElement $row
180
     * @param \SimpleXMLElement $sharedStrings
181
     *
182
     * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
183
     *
184
     * @return array
185
     */
186
    protected function getRowContent(\SimpleXMLElement $row, \SimpleXMLElement $sharedStrings)
187
    {
188
        $rowContent = [];
189
        foreach ($row->c as $cell) {
190
            $v = (string) $cell->v;
191
192
            if (isset($cell['t']) && $cell['t'] == 's') {
193
                $s = [];
194
                $si = $sharedStrings->si[(int) $v];
195
                $si->registerXPathNamespace('n', 'http://schemas.openxmlformats.org/spreadsheetml/2006/main');
196
                foreach ($si->xpath('.//n:t') as $t) {
197
                    $s[] = (string) $t;
198
                }
199
                $v = implode($s);
200
            }
201
            $rowContent[] = $v;
202
        }
203
204
        return $rowContent;
205
    }
206
207
    /**
208
     * @param $dir
209
     *
210
     * @return bool
211
     */
212
    protected function delTree($dir)
0 ignored issues
show
Coding Style introduced by
function delTree() does not seem to conform to the naming convention (^(?:is|has|should|may|supports)).

This check examines a number of code elements and verifies that they conform to the given naming conventions.

You can set conventions for local variables, abstract classes, utility classes, constant, properties, methods, parameters, interfaces, classes, exceptions and special methods.

Loading history...
213
    {
214
        $files = array_diff(scandir($dir), ['.', '..']);
215
        foreach ($files as $file) {
216
            (is_dir("$dir/$file")) ? $this->delTree("$dir/$file") : unlink("$dir/$file");
217
        }
218
219
        return rmdir($dir);
220
    }
221
222
    /**
223
     * @param array $options
224
     */
225
    protected function loadParserOptions(array $options)
226
    {
227
        if (isset($options[static::OPTION_HAS_COLUMN_NAMES])) {
228
            $this->hasColumnNames = $options[static::OPTION_HAS_COLUMN_NAMES];
229
        }
230
231
        if (isset($options[static::OPTION_SHEET_INDEX])) {
232
            $this->sheetIndex = $options[static::OPTION_SHEET_INDEX];
233
        }
234
235
        if (isset($options[static::OPTION_SHEET_NAME])) {
236
            $this->sheetName = $options[static::OPTION_SHEET_NAME];
237
        }
238
    }
239
240
    /**
241
     * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
242
     */
243
    protected function setSheetIndexFromSheetName()
244
    {
245
        $this->sheetIndex = null;
246
        try {
247
            $this->appXml = simplexml_load_file($this->tmpDir . '/docProps/app.xml');
248
        } catch (\Exception $e) {
249
            throw new SpreadsheetParsingException('Error parsing XLSX internal files');
250
        }
251
252
        $sheetIndex = 1;
253
        $this->appXml->registerXPathNamespace('ex', 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties');
254
        foreach ($this->appXml->xpath('//ex:TitlesOfParts/vt:vector/vt:lpstr') as $sheetName) {
255
            if ($this->sheetName == (string) $sheetName) {
256
                $this->sheetIndex = $sheetIndex;
257
                break;
258
            }
259
            $sheetIndex++;
260
        }
261
    }
262
263
    protected function loadXlsxXmlFiles()
264
    {
265
        try {
266
            $this->sharedStringsXml = simplexml_load_file($this->tmpDir . '/xl/sharedStrings.xml');
267
            $this->workbookXml = simplexml_load_file($this->tmpDir . '/xl/workbook.xml');
268
            $this->relationshipsXml = simplexml_load_file($this->tmpDir . '/xl/_rels/workbook.xml.rels');
269
            $this->sheetXml = simplexml_load_file($this->tmpDir . '/xl/worksheets/sheet' . $this->sheetIndex . '.xml');
270
        } catch (\Exception $e) {
271
            throw new SpreadsheetParsingException('Error parsing XLSX internal files');
272
        }
273
    }
274
275
    /**
276
     * @param $numRows
277
     * @param $skipLine
278
     *
279
     * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
280
     *
281
     * @return array
282
     */
283
    protected function readDataFromFile($numRows, $skipLine)
284
    {
285
        $data = [];
286
        $rows = $this->sheetXml->sheetData->row;
287
        foreach ($rows as $row) {
288
            if ($skipLine) {
289
                $skipLine = false;
290
                continue;
291
            }
292
293
            $data[] = $this->columnNames = $this->getRowContent($row, $this->sharedStringsXml);
294
295
            $numRows--;
296
            if ($numRows == 0) {
297
                break;
298
            }
299
        }
300
301
        return $data;
302
    }
303
}
304