Failed Conditions
Push — perf-tests ( 50942d...2fc93e )
by Adrien
14:53
created

CachingStrategyFactory   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 145
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 16
c 1
b 0
f 0
lcom 2
cbo 2
dl 0
loc 145
rs 10

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A getInstance() 0 8 2
A getBestCachingStrategy() 0 8 2
A isInMemoryStrategyUsageSafe() 0 12 2
C getMemoryLimitInKB() 0 25 8
A getMemoryLimitFromIni() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
4
5
/**
6
 * Class CachingStrategyFactory
7
 *
8
 * @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
9
 */
10
class CachingStrategyFactory
11
{
12
    /**
13
     * The memory amount needed to store a string was obtained empirically from this data:
14
     *
15
     *        ------------------------------------
16
     *        | Number of chars⁺ | Memory needed |
17
     *        ------------------------------------
18
     *        |           3,000  |         1 MB  |
19
     *        |          15,000  |         2 MB  |
20
     *        |          30,000  |         5 MB  |
21
     *        |          75,000  |        11 MB  |
22
     *        |         150,000  |        21 MB  |
23
     *        |         300,000  |        43 MB  |
24
     *        |         750,000  |       105 MB  |
25
     *        |       1,500,000  |       210 MB  |
26
     *        |       2,250,000  |       315 MB  |
27
     *        |       3,000,000  |       420 MB  |
28
     *        |       4,500,000  |       630 MB  |
29
     *        ------------------------------------
30
     *
31
     *        ⁺ All characters were 1 byte long
32
     *
33
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
34
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
35
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
36
     *
37
     * This means that in order to store one shared string in memory, the memory amount needed is:
38
     *   => 20 * 600 ≈ 12KB
39
     */
40
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
41
42
    /**
43
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
44
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
45
     * and the string will be quickly retrieved.
46
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
47
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
48
     * best when the indexes of the shared strings are sorted in the sheet data.
49
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
50
     */
51
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
52
53
    /** @var CachingStrategyFactory|null Singleton instance */
54
    protected static $instance = null;
55
56
    /**
57
     * Private constructor for singleton
58
     */
59
    private function __construct()
60
    {
61
    }
62
63
    /**
64
     * Returns the singleton instance of the factory
65
     *
66
     * @return CachingStrategyFactory
67
     */
68
    public static function getInstance()
69
    {
70
        if (self::$instance === null) {
71
            self::$instance = new CachingStrategyFactory();
72
        }
73
74
        return self::$instance;
75
    }
76
77
    /**
78
     * Returns the best caching strategy, given the number of unique shared strings
79
     * and the amount of memory available.
80
     *
81
     * @param int $sharedStringsUniqueCount Number of unique shared strings
82
     * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
83
     * @return CachingStrategyInterface The best caching strategy
84
     */
85
    public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
86
    {
87
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
88
            return new InMemoryStrategy($sharedStringsUniqueCount);
89
        } else {
90
            return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
91
        }
92
    }
93
94
    /**
95
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
96
     * and the amount of memory available.
97
     *
98
     * @param int $sharedStringsUniqueCount Number of unique shared strings
99
     * @return bool
100
     */
101
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
102
    {
103
        $memoryAvailable = $this->getMemoryLimitInKB();
104
105
        if ($memoryAvailable === -1) {
106
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
107
            return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
108
        } else {
109
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
110
            return ($memoryAvailable > $memoryNeeded);
111
        }
112
    }
113
114
    /**
115
     * Returns the PHP "memory_limit" in Kilobytes
116
     *
117
     * @return float
118
     */
119
    protected function getMemoryLimitInKB()
120
    {
121
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
122
        $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
123
124
        // No memory limit
125
        if ($memoryLimitFormatted === '-1') {
126
            return -1;
127
        }
128
129
        if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
130
            $amount = intval($matches[1]);
131
            $unit = $matches[2];
132
133
            switch ($unit) {
134
                case 'b': return ($amount / 1024);
135
                case 'k': return $amount;
136
                case 'm': return ($amount * 1024);
137
                case 'g': return ($amount * 1024 * 1024);
138
                case 't': return ($amount * 1024 * 1024 * 1024);
139
            }
140
        }
141
142
        return -1;
143
    }
144
145
    /**
146
     * Returns the formatted "memory_limit" value
147
     *
148
     * @return string
149
     */
150
    protected function getMemoryLimitFromIni()
151
    {
152
        return ini_get('memory_limit');
153
    }
154
}
155