1 | <?php |
||
19 | class SharedStringsManager |
||
20 | { |
||
21 | /** Path of sharedStrings XML file inside the XLSX file */ |
||
22 | const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml'; |
||
23 | |||
24 | /** Main namespace for the sharedStrings.xml file */ |
||
25 | const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; |
||
26 | |||
27 | /** Definition of XML nodes names used to parse data */ |
||
28 | const XML_NODE_SST = 'sst'; |
||
29 | const XML_NODE_SI = 'si'; |
||
30 | const XML_NODE_R = 'r'; |
||
31 | const XML_NODE_T = 't'; |
||
32 | |||
33 | /** Definition of XML attributes used to parse data */ |
||
34 | const XML_ATTRIBUTE_COUNT = 'count'; |
||
35 | const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount'; |
||
36 | const XML_ATTRIBUTE_XML_SPACE = 'xml:space'; |
||
37 | const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve'; |
||
38 | |||
39 | /** @var string Path of the XLSX file being read */ |
||
40 | protected $filePath; |
||
41 | |||
42 | /** @var string Temporary folder where the temporary files to store shared strings will be stored */ |
||
43 | protected $tempFolder; |
||
44 | |||
45 | /** @var EntityFactory Factory to create entities */ |
||
46 | protected $entityFactory; |
||
47 | |||
48 | /** @var HelperFactory $helperFactory Factory to create helpers */ |
||
49 | protected $helperFactory; |
||
50 | |||
51 | /** @var CachingStrategyFactory Factory to create shared strings caching strategies */ |
||
52 | protected $cachingStrategyFactory; |
||
53 | |||
54 | /** @var CachingStrategyInterface The best caching strategy for storing shared strings */ |
||
55 | protected $cachingStrategy; |
||
56 | |||
57 | /** |
||
58 | * @param string $filePath Path of the XLSX file being read |
||
59 | * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored |
||
60 | * @param EntityFactory $entityFactory Factory to create entities |
||
61 | * @param HelperFactory $helperFactory Factory to create helpers |
||
62 | * @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies |
||
63 | */ |
||
64 | 41 | public function __construct($filePath, $tempFolder, $entityFactory, $helperFactory, $cachingStrategyFactory) |
|
72 | |||
73 | /** |
||
74 | * Returns whether the XLSX file contains a shared strings XML file |
||
75 | * |
||
76 | * @return bool |
||
77 | */ |
||
78 | 35 | public function hasSharedStrings() |
|
90 | |||
91 | /** |
||
92 | * Builds an in-memory array containing all the shared strings of the sheet. |
||
93 | * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. |
||
94 | * It is then accessed by the sheet data, via the string index in the built table. |
||
95 | * |
||
96 | * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx |
||
97 | * |
||
98 | * The XML file can be really big with sheets containing a lot of data. That is why |
||
99 | * we need to use a XML reader that provides streaming like the XMLReader library. |
||
100 | * |
||
101 | * @return void |
||
102 | * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read |
||
103 | */ |
||
104 | 35 | public function extractSharedStrings() |
|
135 | |||
136 | /** |
||
137 | * Returns the shared strings unique count, as specified in <sst> tag. |
||
138 | * |
||
139 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance |
||
140 | * @return int|null Number of unique shared strings in the sharedStrings.xml file |
||
141 | * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read |
||
142 | */ |
||
143 | 35 | protected function getSharedStringsUniqueCount($xmlReader) |
|
162 | |||
163 | /** |
||
164 | * Returns the best shared strings caching strategy. |
||
165 | * |
||
166 | * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) |
||
167 | * @return CachingStrategyInterface |
||
168 | */ |
||
169 | 34 | protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) |
|
174 | |||
175 | /** |
||
176 | * Processes the shared strings item XML node which the given XML reader is positioned on. |
||
177 | * |
||
178 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "<si>" node |
||
179 | * @param int $sharedStringIndex Index of the processed shared strings item |
||
180 | * @return void |
||
181 | */ |
||
182 | 24 | protected function processSharedStringsItem($xmlReader, $sharedStringIndex) |
|
201 | |||
202 | /** |
||
203 | * Not all text nodes' values must be extracted. |
||
204 | * Some text nodes are part of a node describing the pronunciation for instance. |
||
205 | * We'll only consider the nodes whose parents are "<si>" or "<r>". |
||
206 | * |
||
207 | * @param \DOMElement $textNode Text node to check |
||
208 | * @return bool Whether the given text node's value must be extracted |
||
209 | */ |
||
210 | 24 | protected function shouldExtractTextNodeValue($textNode) |
|
215 | |||
216 | /** |
||
217 | * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. |
||
218 | * |
||
219 | * @param \DOMElement $textNode The text node element (<t>) whose whitespace may be preserved |
||
220 | * @return bool Whether whitespace should be preserved |
||
221 | */ |
||
222 | 24 | protected function shouldPreserveWhitespace($textNode) |
|
227 | |||
228 | /** |
||
229 | * Returns the shared string at the given index, using the previously chosen caching strategy. |
||
230 | * |
||
231 | * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file |
||
232 | * @return string The shared string at the given index |
||
233 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index |
||
234 | */ |
||
235 | 24 | public function getStringAtIndex($sharedStringIndex) |
|
239 | |||
240 | /** |
||
241 | * Destroys the cache, freeing memory and removing any created artifacts |
||
242 | * |
||
243 | * @return void |
||
244 | */ |
||
245 | 38 | public function cleanup() |
|
251 | } |
||
252 |