Completed
Pull Request — develop_3.0 (#457)
by Adrien
02:34
created

CachingStrategyFactory::getMemoryLimitFromIni()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
4
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
5
6
/**
7
 * Class CachingStrategyFactory
8
 *
9
 * @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
10
 */
11
class CachingStrategyFactory
12
{
13
    /**
14
     * The memory amount needed to store a string was obtained empirically from this data:
15
     *
16
     *        ------------------------------------
17
     *        | Number of chars⁺ | Memory needed |
18
     *        ------------------------------------
19
     *        |           3,000  |         1 MB  |
20
     *        |          15,000  |         2 MB  |
21
     *        |          30,000  |         5 MB  |
22
     *        |          75,000  |        11 MB  |
23
     *        |         150,000  |        21 MB  |
24
     *        |         300,000  |        43 MB  |
25
     *        |         750,000  |       105 MB  |
26
     *        |       1,500,000  |       210 MB  |
27
     *        |       2,250,000  |       315 MB  |
28
     *        |       3,000,000  |       420 MB  |
29
     *        |       4,500,000  |       630 MB  |
30
     *        ------------------------------------
31
     *
32
     *        ⁺ All characters were 1 byte long
33
     *
34
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
35
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
36
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
37
     *
38
     * This means that in order to store one shared string in memory, the memory amount needed is:
39
     *   => 20 * 600 ≈ 12KB
40
     */
41
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
42
43
    /**
44
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
45
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
46
     * and the string will be quickly retrieved.
47
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
48
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
49
     * best when the indexes of the shared strings are sorted in the sheet data.
50
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
51
     */
52
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
53
54
55
    /**
56
     * Returns the best caching strategy, given the number of unique shared strings
57
     * and the amount of memory available.
58
     *
59
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
60
     * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
61
     * @param HelperFactory $helperFactory Factory to create helpers
62
     * @return CachingStrategyInterface The best caching strategy
63
     */
64 41
    public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory)
65
    {
66 41
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
67 33
            return new InMemoryStrategy($sharedStringsUniqueCount);
68
        } else {
69 8
            return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory);
70
        }
71
    }
72
73
    /**
74
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
75
     * and the amount of memory available.
76
     *
77
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
78
     * @return bool
79
     */
80 41
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
81
    {
82
        // if the number of shared strings in unknown, do not use "in memory" strategy
83 41
        if ($sharedStringsUniqueCount === null) {
84 3
            return false;
85
        }
86
87 38
        $memoryAvailable = $this->getMemoryLimitInKB();
88
89 38
        if ($memoryAvailable === -1) {
90
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
91 4
            return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
92
        } else {
93 34
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
94 34
            return ($memoryAvailable > $memoryNeeded);
95
        }
96
    }
97
98
    /**
99
     * Returns the PHP "memory_limit" in Kilobytes
100
     *
101
     * @return float
102
     */
103 43
    protected function getMemoryLimitInKB()
104
    {
105 43
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
106 43
        $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
107
108
        // No memory limit
109 43
        if ($memoryLimitFormatted === '-1') {
110 2
            return -1;
111
        }
112
113 41
        if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
114 40
            $amount = intval($matches[1]);
115 40
            $unit = $matches[2];
116
117
            switch ($unit) {
118 40
                case 'b': return ($amount / 1024);
119 39
                case 'k': return $amount;
120 37
                case 'm': return ($amount * 1024);
121 35
                case 'g': return ($amount * 1024 * 1024);
122 2
                case 't': return ($amount * 1024 * 1024 * 1024);
123
            }
124
        }
125
126 1
        return -1;
127
    }
128
129
    /**
130
     * Returns the formatted "memory_limit" value
131
     *
132
     * @return string
133
     */
134 32
    protected function getMemoryLimitFromIni()
135
    {
136 32
        return ini_get('memory_limit');
137
    }
138
}
139