Completed
Push — fix_windows_test ( db6202...c04b37 )
by Adrien
04:02
created

StyleHelper::extractNumberFormats()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 4

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 13
ccs 10
cts 10
cp 1
rs 9.2
cc 4
eloc 8
nc 4
nop 1
crap 4
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
use Box\Spout\Reader\Wrapper\XMLReader;
6
7
/**
8
 * Class StyleHelper
9
 * This class provides helper functions related to XLSX styles
10
 *
11
 * @package Box\Spout\Reader\XLSX\Helper
12
 */
13
class StyleHelper
14
{
15
    /** Paths of XML files relative to the XLSX file root */
16
    const STYLES_XML_FILE_PATH = 'xl/styles.xml';
17
18
    /** Nodes used to find relevant information in the styles XML file */
19
    const XML_NODE_NUM_FMTS = 'numFmts';
20
    const XML_NODE_NUM_FMT = 'numFmt';
21
    const XML_NODE_CELL_XFS = 'cellXfs';
22
    const XML_NODE_XF = 'xf';
23
24
    /** Attributes used to find relevant information in the styles XML file */
25
    const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
26
    const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
27
    const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
28
29
    /** By convention, default style ID is 0 */
30
    const DEFAULT_STYLE_ID = 0;
31
32
    /**
33
     * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
34
     * @var array Mapping between built-in numFmtId and the associated format - for dates only
35
     */
36
    protected static $builtinNumFmtIdToNumFormatMapping = [
37
        14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
38
        15 => 'd-mmm-yy',
39
        16 => 'd-mmm',
40
        17 => 'mmm-yy',
41
        18 => 'h:mm AM/PM',
42
        19 => 'h:mm:ss AM/PM',
43
        20 => 'h:mm',
44
        21 => 'h:mm:ss',
45
        22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
46
        45 => 'mm:ss',
47
        46 => '[h]:mm:ss',
48
        47 => 'mm:ss.0',  // @NOTE: ECMA spec is 'mmss.0',
49
    ];
50
51
    /** @var string Path of the XLSX file being read */
52
    protected $filePath;
53
54
    /** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
55
    protected $customNumberFormats;
56
57
    /** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
58
    protected $stylesAttributes;
59
60
    /**
61
     * @param string $filePath Path of the XLSX file being read
62
     */
63 90
    public function __construct($filePath)
64
    {
65 90
        $this->filePath = $filePath;
66 90
    }
67
68
    /**
69
     * Reads the styles.xml file and extract the relevant information from the file.
70
     *
71
     * @return void
72
     */
73 27
    protected function extractRelevantInfo()
74
    {
75 27
        $this->customNumberFormats = [];
76 27
        $this->stylesAttributes = [];
77
78 27
        $xmlReader = new XMLReader();
79
80 27
        if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
81 27
            while ($xmlReader->read()) {
82 27
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
83 18
                    $this->extractNumberFormats($xmlReader);
84
85 27
                } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
86 27
                    $this->extractStyleAttributes($xmlReader);
87 27
                }
88 27
            }
89
90 27
            $xmlReader->close();
91 27
        }
92 27
    }
93
94
    /**
95
     * Extracts number formats from the "numFmt" nodes.
96
     * For simplicity, the styles attributes are kept in memory. This is possible thanks
97
     * to the reuse of formats. So 1 million cells should not use 1 million formats.
98
     *
99
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node
100
     * @return void
101
     */
102 18
    protected function extractNumberFormats($xmlReader)
103
    {
104 18
        while ($xmlReader->read()) {
105 18
            if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
106 18
                $numFmtId = intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
107 18
                $formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
108 18
                $this->customNumberFormats[$numFmtId] = $formatCode;
109 18
            } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
110
                // Once done reading "numFmts" node's children
111 18
                break;
112
            }
113 18
        }
114 18
    }
115
116
    /**
117
     * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section.
118
     * For simplicity, the styles attributes are kept in memory. This is possible thanks
119
     * to the reuse of styles. So 1 million cells should not use 1 million styles.
120
     *
121
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
122
     * @return void
123
     */
124 27
    protected function extractStyleAttributes($xmlReader)
125
    {
126 27
        while ($xmlReader->read()) {
127 27
            if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
128 27
                $this->stylesAttributes[] = [
129 27
                    self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)),
130 27
                    self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)),
131
                ];
132 27
            } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
133
                // Once done reading "cellXfs" node's children
134 27
                break;
135
            }
136 27
        }
137 27
    }
138
139
    /**
140
     * @return array The custom number formats
141
     */
142 12
    protected function getCustomNumberFormats()
143
    {
144 12
        if (!isset($this->customNumberFormats)) {
145
            $this->extractRelevantInfo();
146
        }
147
148 12
        return $this->customNumberFormats;
149
    }
150
151
    /**
152
     * @return array The styles attributes
153
     */
154 27
    protected function getStylesAttributes()
155
    {
156 27
        if (!isset($this->stylesAttributes)) {
157 27
            $this->extractRelevantInfo();
158 27
        }
159
160 27
        return $this->stylesAttributes;
161
    }
162
163
    /**
164
     * Returns whether the style with the given ID should consider
165
     * numeric values as timestamps and format the cell as a date.
166
     *
167
     * @param int $styleId Zero-based style ID
168
     * @return bool Whether the cell with the given cell should display a date instead of a numeric value
169
     */
170 108
    public function shouldFormatNumericValueAsDate($styleId)
171
    {
172 108
        $stylesAttributes = $this->getStylesAttributes();
173
174
        // Default style (0) does not format numeric values as timestamps. Only custom styles do.
175
        // Also if the style ID does not exist in the styles.xml file, format as numeric value.
176
        // Using isset here because it is way faster than array_key_exists...
177 108
        if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
178 21
            return false;
179
        }
180
181 87
        $styleAttributes = $stylesAttributes[$styleId];
182
183 87
        $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
184 87
        if (!$applyNumberFormat) {
185 3
            return false;
186
        }
187
188 84
        $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
189 84
        return $this->doesNumFmtIdIndicateDate($numFmtId);
190
    }
191
192
    /**
193
     * @param int $numFmtId
194
     * @return bool Whether the number format ID indicates that the number is a timestamp
195
     */
196 84
    protected function doesNumFmtIdIndicateDate($numFmtId)
197
    {
198
        return (
199 84
            !$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) &&
200
            (
201 81
                $this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
202 78
                $this->isNumFmtIdCustomDateFormat($numFmtId)
203 78
            )
204 84
        );
205
    }
206
207
    /**
208
     * @param int $numFmtId
209
     * @return bool Whether the number format ID indicates the "General" format (0 by convention)
210
     */
211 84
    protected function doesNumFmtIdIndicateGeneralFormat($numFmtId)
212
    {
213 84
        return ($numFmtId === 0);
214
    }
215
216
    /**
217
     * @param int $numFmtId
218
     * @return bool Whether the number format ID indicates that the number is a timestamp
219
     */
220 81
    protected function isNumFmtIdBuiltInDateFormat($numFmtId)
221
    {
222 81
        $builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
223 81
        return in_array($numFmtId, $builtInDateFormatIds);
224
    }
225
226
    /**
227
     * @param int $numFmtId
228
     * @return bool Whether the number format ID indicates that the number is a timestamp
229
     */
230 78
    protected function isNumFmtIdCustomDateFormat($numFmtId)
231
    {
232 78
        $customNumberFormats = $this->getCustomNumberFormats();
233
234
        // Using isset here because it is way faster than array_key_exists...
235 78
        if (!isset($customNumberFormats[$numFmtId])) {
236 3
            return false;
237
        }
238
239 75
        $customNumberFormat = $customNumberFormats[$numFmtId];
240
241
        // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
242 75
        $pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
243 75
        $customNumberFormat = preg_replace($pattern, '', $customNumberFormat);
244
245
        // custom date formats contain specific characters to represent the date:
246
        // e - yy - m - d - h - s
247
        // and all of their variants (yyyy - mm - dd...)
248 75
        $dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's'];
249
250 75
        $hasFoundDateFormatCharacter = false;
251 75
        foreach ($dateFormatCharacters as $dateFormatCharacter) {
252
            // character not preceded by "\"
253 75
            $pattern = '/(?<!\\\)' . $dateFormatCharacter . '/';
254
255 75
            if (preg_match($pattern, $customNumberFormat)) {
256 66
                $hasFoundDateFormatCharacter = true;
257 66
                break;
258
            }
259 75
        }
260
261 75
        return $hasFoundDateFormatCharacter;
262
    }
263
264
    /**
265
     * Returns the format as defined in "styles.xml" of the given style.
266
     * NOTE: It is assumed that the style DOES have a number format associated to it.
267
     *
268
     * @param int $styleId Zero-based style ID
269
     * @return string The number format associated with the given style
270
     */
271 3
    public function getNumberFormat($styleId)
272
    {
273 3
        $stylesAttributes = $this->getStylesAttributes();
274 3
        $styleAttributes = $stylesAttributes[$styleId];
275 3
        $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
276
277 3
        if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
278 3
            $numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
279 3
        } else {
280 3
            $customNumberFormats = $this->getCustomNumberFormats();
281 3
            $numberFormat = $customNumberFormats[$numFmtId];
282
        }
283
284 3
        return $numberFormat;
285
    }
286
}
287