1 | <?php |
||
10 | class CachingStrategyFactory |
||
11 | { |
||
12 | /** |
||
13 | * The memory amount needed to store a string was obtained empirically from this data: |
||
14 | * |
||
15 | * ------------------------------------ |
||
16 | * | Number of chars⁺ | Memory needed | |
||
17 | * ------------------------------------ |
||
18 | * | 3,000 | 1 MB | |
||
19 | * | 15,000 | 2 MB | |
||
20 | * | 30,000 | 5 MB | |
||
21 | * | 75,000 | 11 MB | |
||
22 | * | 150,000 | 21 MB | |
||
23 | * | 300,000 | 43 MB | |
||
24 | * | 750,000 | 105 MB | |
||
25 | * | 1,500,000 | 210 MB | |
||
26 | * | 2,250,000 | 315 MB | |
||
27 | * | 3,000,000 | 420 MB | |
||
28 | * | 4,500,000 | 630 MB | |
||
29 | * ------------------------------------ |
||
30 | * |
||
31 | * ⁺ All characters were 1 byte long |
||
32 | * |
||
33 | * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored. |
||
34 | * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe. |
||
35 | * Also, there is on average about 20 characters per cell (this is entirely empirical data...). |
||
36 | * |
||
37 | * This means that in order to store one shared string in memory, the memory amount needed is: |
||
38 | * => 20 * 600 ≈ 12KB |
||
39 | */ |
||
40 | const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; |
||
41 | |||
42 | /** |
||
43 | * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files |
||
44 | * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory |
||
45 | * and the string will be quickly retrieved. |
||
46 | * The performance bottleneck is not when creating these temporary files, but rather when loading their content. |
||
47 | * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works |
||
48 | * best when the indexes of the shared strings are sorted in the sheet data. |
||
49 | * 10,000 was chosen because it creates small files that are fast to be loaded in memory. |
||
50 | */ |
||
51 | const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; |
||
52 | |||
53 | /** |
||
54 | * Returns the best caching strategy, given the number of unique shared strings |
||
55 | * and the amount of memory available. |
||
56 | * |
||
57 | * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) |
||
58 | * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored |
||
59 | * @param HelperFactory $helperFactory Factory to create helpers |
||
60 | * @return CachingStrategyInterface The best caching strategy |
||
61 | */ |
||
62 | 48 | public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory) |
|
63 | { |
||
64 | 48 | if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { |
|
65 | 39 | return new InMemoryStrategy($sharedStringsUniqueCount); |
|
66 | } |
||
67 | |||
68 | 9 | return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory); |
|
69 | } |
||
70 | |||
71 | /** |
||
72 | * Returns whether it is safe to use in-memory caching, given the number of unique shared strings |
||
73 | * and the amount of memory available. |
||
74 | * |
||
75 | * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) |
||
76 | * @return bool |
||
77 | */ |
||
78 | 48 | protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) |
|
97 | |||
98 | /** |
||
99 | * Returns the PHP "memory_limit" in Kilobytes |
||
100 | * |
||
101 | * @return float |
||
102 | */ |
||
103 | 49 | protected function getMemoryLimitInKB() |
|
104 | { |
||
105 | 49 | $memoryLimitFormatted = $this->getMemoryLimitFromIni(); |
|
106 | 49 | $memoryLimitFormatted = \strtolower(\trim($memoryLimitFormatted)); |
|
107 | |||
108 | // No memory limit |
||
109 | 49 | if ($memoryLimitFormatted === '-1') { |
|
110 | 2 | return -1; |
|
111 | } |
||
112 | |||
113 | 47 | if (\preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { |
|
114 | 46 | $amount = (int) ($matches[1]); |
|
115 | 46 | $unit = $matches[2]; |
|
116 | |||
117 | 46 | switch ($unit) { |
|
118 | 46 | case 'b': return ($amount / 1024); |
|
119 | 45 | case 'k': return $amount; |
|
120 | 43 | case 'm': return ($amount * 1024); |
|
121 | 41 | case 'g': return ($amount * 1024 * 1024); |
|
122 | 2 | case 't': return ($amount * 1024 * 1024 * 1024); |
|
123 | } |
||
124 | } |
||
125 | |||
126 | 1 | return -1; |
|
127 | } |
||
128 | |||
129 | /** |
||
130 | * Returns the formatted "memory_limit" value |
||
131 | * |
||
132 | * @return string |
||
133 | */ |
||
134 | 38 | protected function getMemoryLimitFromIni() |
|
138 | } |
||
139 |