CachingStrategyFactory::getMemoryLimitInKB()   B
last analyzed

Complexity

Conditions 8
Paths 8

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 8

Importance

Changes 0
Metric Value
dl 0
loc 25
ccs 15
cts 15
cp 1
rs 8.4444
c 0
b 0
f 0
cc 8
nc 8
nop 0
crap 8
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching;
4
5
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
6
7
/**
8
 * Class CachingStrategyFactory
9
 */
10
class CachingStrategyFactory
11
{
12
    /**
13
     * The memory amount needed to store a string was obtained empirically from this data:
14
     *
15
     *        ------------------------------------
16
     *        | Number of chars⁺ | Memory needed |
17
     *        ------------------------------------
18
     *        |           3,000  |         1 MB  |
19
     *        |          15,000  |         2 MB  |
20
     *        |          30,000  |         5 MB  |
21
     *        |          75,000  |        11 MB  |
22
     *        |         150,000  |        21 MB  |
23
     *        |         300,000  |        43 MB  |
24
     *        |         750,000  |       105 MB  |
25
     *        |       1,500,000  |       210 MB  |
26
     *        |       2,250,000  |       315 MB  |
27
     *        |       3,000,000  |       420 MB  |
28
     *        |       4,500,000  |       630 MB  |
29
     *        ------------------------------------
30
     *
31
     *        ⁺ All characters were 1 byte long
32
     *
33
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
34
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
35
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
36
     *
37
     * This means that in order to store one shared string in memory, the memory amount needed is:
38
     *   => 20 * 600 ≈ 12KB
39
     */
40
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
41
42
    /**
43
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
44
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
45
     * and the string will be quickly retrieved.
46
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
47
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
48
     * best when the indexes of the shared strings are sorted in the sheet data.
49
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
50
     */
51
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
52
53
    /**
54
     * Returns the best caching strategy, given the number of unique shared strings
55
     * and the amount of memory available.
56
     *
57
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
58
     * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
59
     * @param HelperFactory $helperFactory Factory to create helpers
60
     * @return CachingStrategyInterface The best caching strategy
61
     */
62 48
    public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory)
63
    {
64 48
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
65 39
            return new InMemoryStrategy($sharedStringsUniqueCount);
66
        }
67
68 9
        return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory);
69
    }
70
71
    /**
72
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
73
     * and the amount of memory available.
74
     *
75
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
76
     * @return bool
77
     */
78 48
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
79
    {
80
        // if the number of shared strings in unknown, do not use "in memory" strategy
81 48
        if ($sharedStringsUniqueCount === null) {
82 4
            return false;
83
        }
84
85 44
        $memoryAvailable = $this->getMemoryLimitInKB();
86
87 44
        if ($memoryAvailable === -1) {
88
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
89 4
            $isInMemoryStrategyUsageSafe = ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
90
        } else {
91 40
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
92 40
            $isInMemoryStrategyUsageSafe = ($memoryAvailable > $memoryNeeded);
93
        }
94
95 44
        return $isInMemoryStrategyUsageSafe;
96
    }
97
98
    /**
99
     * Returns the PHP "memory_limit" in Kilobytes
100
     *
101
     * @return float
102
     */
103 49
    protected function getMemoryLimitInKB()
104
    {
105 49
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
106 49
        $memoryLimitFormatted = \strtolower(\trim($memoryLimitFormatted));
107
108
        // No memory limit
109 49
        if ($memoryLimitFormatted === '-1') {
110 2
            return -1;
111
        }
112
113 47
        if (\preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
114 46
            $amount = (int) ($matches[1]);
115 46
            $unit = $matches[2];
116
117 46
            switch ($unit) {
118 46
                case 'b': return ($amount / 1024);
119 45
                case 'k': return $amount;
120 43
                case 'm': return ($amount * 1024);
121 41
                case 'g': return ($amount * 1024 * 1024);
122 2
                case 't': return ($amount * 1024 * 1024 * 1024);
123
            }
124
        }
125
126 1
        return -1;
127
    }
128
129
    /**
130
     * Returns the formatted "memory_limit" value
131
     *
132
     * @return string
133
     */
134 38
    protected function getMemoryLimitFromIni()
135
    {
136 38
        return \ini_get('memory_limit');
137
    }
138
}
139