1 | <?php |
||
18 | class SharedStringsHelper |
||
19 | { |
||
20 | /** Path of sharedStrings XML file inside the XLSX file */ |
||
21 | const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml'; |
||
22 | |||
23 | /** Main namespace for the sharedStrings.xml file */ |
||
24 | const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; |
||
25 | |||
26 | /** @var string Path of the XLSX file being read */ |
||
27 | protected $filePath; |
||
28 | |||
29 | /** @var string Temporary folder where the temporary files to store shared strings will be stored */ |
||
30 | protected $tempFolder; |
||
31 | |||
32 | /** @var CachingStrategyInterface The best caching strategy for storing shared strings */ |
||
33 | protected $cachingStrategy; |
||
34 | |||
35 | /** |
||
36 | * @param string $filePath Path of the XLSX file being read |
||
37 | * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored |
||
38 | */ |
||
39 | public function __construct($filePath, $tempFolder = null) |
||
44 | |||
45 | /** |
||
46 | * Returns whether the XLSX file contains a shared strings XML file |
||
47 | * |
||
48 | * @return bool |
||
49 | */ |
||
50 | public function hasSharedStrings() |
||
62 | |||
63 | /** |
||
64 | * Builds an in-memory array containing all the shared strings of the sheet. |
||
65 | * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. |
||
66 | * It is then accessed by the sheet data, via the string index in the built table. |
||
67 | * |
||
68 | * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx |
||
69 | * |
||
70 | * The XML file can be really big with sheets containing a lot of data. That is why |
||
71 | * we need to use a XML reader that provides streaming like the XMLReader library. |
||
72 | * Please note that SimpleXML does not provide such a functionality but since it is faster |
||
73 | * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. |
||
74 | * |
||
75 | * @return void |
||
76 | * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read |
||
77 | */ |
||
78 | public function extractSharedStrings() |
||
137 | |||
138 | /** |
||
139 | * @return string The path to the shared strings XML file |
||
140 | */ |
||
141 | protected function getSharedStringsFilePath() |
||
145 | |||
146 | /** |
||
147 | * Returns the shared strings unique count, as specified in <sst> tag. |
||
148 | * |
||
149 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance |
||
150 | * @return int|null Number of unique shared strings in the sharedStrings.xml file |
||
151 | * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read |
||
152 | */ |
||
153 | protected function getSharedStringsUniqueCount($xmlReader) |
||
154 | { |
||
155 | $xmlReader->next('sst'); |
||
156 | |||
157 | // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) |
||
158 | while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) { |
||
159 | $xmlReader->read(); |
||
160 | } |
||
161 | |||
162 | $uniqueCount = $xmlReader->getAttribute('uniqueCount'); |
||
163 | |||
164 | // some software do not add the "uniqueCount" attribute but only use the "count" one |
||
165 | // @see https://github.com/box/spout/issues/254 |
||
166 | if ($uniqueCount === null) { |
||
167 | $uniqueCount = $xmlReader->getAttribute('count'); |
||
168 | } |
||
169 | |||
170 | return ($uniqueCount !== null) ? intval($uniqueCount) : null; |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Returns the best shared strings caching strategy. |
||
175 | * |
||
176 | * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) |
||
177 | * @return CachingStrategyInterface |
||
178 | */ |
||
179 | protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) |
||
184 | |||
185 | /** |
||
186 | * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. |
||
187 | * This is to simplify the parsing of the subtree. |
||
188 | * |
||
189 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader |
||
190 | * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement |
||
191 | * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read |
||
192 | */ |
||
193 | protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) |
||
204 | |||
205 | /** |
||
206 | * Removes nodes that should not be read, like the pronunciation of the Kanji characters. |
||
207 | * By keeping them, their text content would be added to the read string. |
||
208 | * |
||
209 | * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove |
||
210 | * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node |
||
211 | */ |
||
212 | protected function removeSuperfluousTextNodes($parentNode) |
||
227 | |||
228 | /** |
||
229 | * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. |
||
230 | * |
||
231 | * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved |
||
232 | * @return bool Whether whitespace should be preserved |
||
233 | */ |
||
234 | protected function shouldPreserveWhitespace($textNode) |
||
239 | |||
240 | /** |
||
241 | * Returns the shared string at the given index, using the previously chosen caching strategy. |
||
242 | * |
||
243 | * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file |
||
244 | * @return string The shared string at the given index |
||
245 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index |
||
246 | */ |
||
247 | public function getStringAtIndex($sharedStringIndex) |
||
251 | |||
252 | /** |
||
253 | * Destroys the cache, freeing memory and removing any created artifacts |
||
254 | * |
||
255 | * @return void |
||
256 | */ |
||
257 | public function cleanup() |
||
263 | } |
||
264 |