Failed Conditions
Push — develop_3.0 ( 80553c...f9d8ad )
by Adrien
31:09
created

CachingStrategyFactory   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 150
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 2

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 17
lcom 2
cbo 2
dl 0
loc 150
ccs 34
cts 34
cp 1
rs 10
c 0
b 0
f 0

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A getBestCachingStrategy() 0 8 2
A isInMemoryStrategyUsageSafe() 0 17 3
A getMemoryLimitFromIni() 0 4 1
A getInstance() 0 8 2
C getMemoryLimitInKB() 0 25 8
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
4
5
/**
6
 * Class CachingStrategyFactory
7
 *
8
 * @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
9
 */
10
class CachingStrategyFactory
11
{
12
    /**
13
     * The memory amount needed to store a string was obtained empirically from this data:
14
     *
15
     *        ------------------------------------
16
     *        | Number of chars⁺ | Memory needed |
17
     *        ------------------------------------
18
     *        |           3,000  |         1 MB  |
19
     *        |          15,000  |         2 MB  |
20
     *        |          30,000  |         5 MB  |
21
     *        |          75,000  |        11 MB  |
22
     *        |         150,000  |        21 MB  |
23
     *        |         300,000  |        43 MB  |
24
     *        |         750,000  |       105 MB  |
25
     *        |       1,500,000  |       210 MB  |
26
     *        |       2,250,000  |       315 MB  |
27
     *        |       3,000,000  |       420 MB  |
28
     *        |       4,500,000  |       630 MB  |
29
     *        ------------------------------------
30
     *
31
     *        ⁺ All characters were 1 byte long
32
     *
33
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
34
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
35
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
36
     *
37
     * This means that in order to store one shared string in memory, the memory amount needed is:
38
     *   => 20 * 600 ≈ 12KB
39
     */
40
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
41
42
    /**
43
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
44
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
45
     * and the string will be quickly retrieved.
46
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
47
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
48
     * best when the indexes of the shared strings are sorted in the sheet data.
49
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
50
     */
51
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
52
53
    /** @var CachingStrategyFactory|null Singleton instance */
54
    protected static $instance = null;
55
56
    /**
57
     * Private constructor for singleton
58
     */
59 1
    private function __construct()
60
    {
61 1
    }
62
63
    /**
64
     * Returns the singleton instance of the factory
65
     *
66
     * @return CachingStrategyFactory
67
     */
68 34
    public static function getInstance()
69
    {
70 34
        if (self::$instance === null) {
71 1
            self::$instance = new CachingStrategyFactory();
72
        }
73
74 34
        return self::$instance;
75
    }
76
77
    /**
78
     * Returns the best caching strategy, given the number of unique shared strings
79
     * and the amount of memory available.
80
     *
81
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
82
     * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
83
     * @return CachingStrategyInterface The best caching strategy
84
     */
85 41
    public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
86
    {
87 41
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
88 33
            return new InMemoryStrategy($sharedStringsUniqueCount);
89
        } else {
90 8
            return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
91
        }
92
    }
93
94
    /**
95
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
96
     * and the amount of memory available.
97
     *
98
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
99
     * @return bool
100
     */
101 41
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
102
    {
103
        // if the number of shared strings in unknown, do not use "in memory" strategy
104 41
        if ($sharedStringsUniqueCount === null) {
105 3
            return false;
106
        }
107
108 38
        $memoryAvailable = $this->getMemoryLimitInKB();
109
110 38
        if ($memoryAvailable === -1) {
111
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
112 4
            return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
113
        } else {
114 34
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
115 34
            return ($memoryAvailable > $memoryNeeded);
116
        }
117
    }
118
119
    /**
120
     * Returns the PHP "memory_limit" in Kilobytes
121
     *
122
     * @return float
123
     */
124 43
    protected function getMemoryLimitInKB()
125
    {
126 43
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
127 43
        $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
128
129
        // No memory limit
130 43
        if ($memoryLimitFormatted === '-1') {
131 2
            return -1;
132
        }
133
134 41
        if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
135 40
            $amount = intval($matches[1]);
136 40
            $unit = $matches[2];
137
138
            switch ($unit) {
139 40
                case 'b': return ($amount / 1024);
140 39
                case 'k': return $amount;
141 37
                case 'm': return ($amount * 1024);
142 35
                case 'g': return ($amount * 1024 * 1024);
143 2
                case 't': return ($amount * 1024 * 1024 * 1024);
144
            }
145
        }
146
147 1
        return -1;
148
    }
149
150
    /**
151
     * Returns the formatted "memory_limit" value
152
     *
153
     * @return string
154
     */
155 32
    protected function getMemoryLimitFromIni()
156
    {
157 32
        return ini_get('memory_limit');
158
    }
159
}
160