Completed
Push — master ( ffea88...a8eb7a )
by Adrien
05:03
created

CachingStrategyFactory::getMemoryLimitFromIni()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
rs 10
ccs 2
cts 2
cp 1
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
4
5
/**
6
 * Class CachingStrategyFactory
7
 *
8
 * @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
9
 */
10
class CachingStrategyFactory
11
{
12
    /**
13
     * The memory amount needed to store a string was obtained empirically from this data:
14
     *
15
     *        ------------------------------------
16
     *        | Number of chars⁺ | Memory needed |
17
     *        ------------------------------------
18
     *        |           3,000  |         1 MB  |
19
     *        |          15,000  |         2 MB  |
20
     *        |          30,000  |         5 MB  |
21
     *        |          75,000  |        11 MB  |
22
     *        |         150,000  |        21 MB  |
23
     *        |         300,000  |        43 MB  |
24
     *        |         750,000  |       105 MB  |
25
     *        |       1,500,000  |       210 MB  |
26
     *        |       2,250,000  |       315 MB  |
27
     *        |       3,000,000  |       420 MB  |
28
     *        |       4,500,000  |       630 MB  |
29
     *        ------------------------------------
30
     *
31
     *        ⁺ All characters were 1 byte long
32
     *
33
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
34
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
35
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
36
     *
37
     * This means that in order to store one shared string in memory, the memory amount needed is:
38
     *   => 20 * 600 ≈ 12KB
39
     */
40
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
41
42
    /**
43
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
44
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
45
     * and the string will be quickly retrieved.
46
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
47
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
48
     * best when the indexes of the shared strings are sorted in the sheet data.
49
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
50
     */
51
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
52
53
    /** @var CachingStrategyFactory|null Singleton instance */
54
    protected static $instance = null;
55
56
    /**
57
     * Private constructor for singleton
58
     */
59 3
    private function __construct()
60
    {
61 3
    }
62
63
    /**
64
     * Returns the singleton instance of the factory
65
     *
66
     * @return CachingStrategyFactory
67
     */
68 96
    public static function getInstance()
69
    {
70 96
        if (self::$instance === null) {
71 3
            self::$instance = new CachingStrategyFactory();
72 3
        }
73
74 96
        return self::$instance;
75
    }
76
77
    /**
78
     * Returns the best caching strategy, given the number of unique shared strings
79
     * and the amount of memory available.
80
     *
81
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
82
     * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
83
     * @return CachingStrategyInterface The best caching strategy
84
     */
85 117
    public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
86
    {
87 117
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
88 93
            return new InMemoryStrategy($sharedStringsUniqueCount);
89
        } else {
90 24
            return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
91
        }
92
    }
93
94
    /**
95
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
96
     * and the amount of memory available.
97
     *
98
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
99
     * @return bool
100
     */
101 117
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
102
    {
103
        // if the number of shared strings in unknown, do not use "in memory" strategy
104 117
        if ($sharedStringsUniqueCount === null) {
105 9
            return false;
106
        }
107
108 108
        $memoryAvailable = $this->getMemoryLimitInKB();
109
110 108
        if ($memoryAvailable === -1) {
111
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
112 12
            return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
113
        } else {
114 96
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
115 96
            return ($memoryAvailable > $memoryNeeded);
116
        }
117
    }
118
119
    /**
120
     * Returns the PHP "memory_limit" in Kilobytes
121
     *
122
     * @return float
123
     */
124 123
    protected function getMemoryLimitInKB()
125
    {
126 123
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
127 123
        $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
128
129
        // No memory limit
130 123
        if ($memoryLimitFormatted === '-1') {
131 6
            return -1;
132
        }
133
134 117
        if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
135 114
            $amount = intval($matches[1]);
136 114
            $unit = $matches[2];
137
138
            switch ($unit) {
139 114
                case 'b': return ($amount / 1024);
140 111
                case 'k': return $amount;
141 105
                case 'm': return ($amount * 1024);
142 99
                case 'g': return ($amount * 1024 * 1024);
143 6
                case 't': return ($amount * 1024 * 1024 * 1024);
144
            }
145
        }
146
147 3
        return -1;
148
    }
149
150
    /**
151
     * Returns the formatted "memory_limit" value
152
     *
153
     * @return string
154
     */
155 90
    protected function getMemoryLimitFromIni()
156
    {
157 90
        return ini_get('memory_limit');
158
    }
159
}
160