Completed
Pull Request — develop_3.0 (#460)
by Adrien
02:25
created

CachingStrategyFactory   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 128
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 14
lcom 1
cbo 2
dl 0
loc 128
ccs 28
cts 28
cp 1
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A createBestCachingStrategy() 0 8 2
A isInMemoryStrategyUsageSafe() 0 17 3
C getMemoryLimitInKB() 0 25 8
A getMemoryLimitFromIni() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching;
4
5
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
6
7
/**
8
 * Class CachingStrategyFactory
9
 *
10
 * @package Box\Spout\Reader\XLSX\Manager\SharedStringsCaching
11
 */
12
class CachingStrategyFactory
13
{
14
    /**
15
     * The memory amount needed to store a string was obtained empirically from this data:
16
     *
17
     *        ------------------------------------
18
     *        | Number of chars⁺ | Memory needed |
19
     *        ------------------------------------
20
     *        |           3,000  |         1 MB  |
21
     *        |          15,000  |         2 MB  |
22
     *        |          30,000  |         5 MB  |
23
     *        |          75,000  |        11 MB  |
24
     *        |         150,000  |        21 MB  |
25
     *        |         300,000  |        43 MB  |
26
     *        |         750,000  |       105 MB  |
27
     *        |       1,500,000  |       210 MB  |
28
     *        |       2,250,000  |       315 MB  |
29
     *        |       3,000,000  |       420 MB  |
30
     *        |       4,500,000  |       630 MB  |
31
     *        ------------------------------------
32
     *
33
     *        ⁺ All characters were 1 byte long
34
     *
35
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
36
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
37
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
38
     *
39
     * This means that in order to store one shared string in memory, the memory amount needed is:
40
     *   => 20 * 600 ≈ 12KB
41
     */
42
    const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
43
44
    /**
45
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
46
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
47
     * and the string will be quickly retrieved.
48
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
49
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
50
     * best when the indexes of the shared strings are sorted in the sheet data.
51
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
52
     */
53
    const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
54
55
56
    /**
57
     * Returns the best caching strategy, given the number of unique shared strings
58
     * and the amount of memory available.
59
     *
60
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
61
     * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
62
     * @param HelperFactory $helperFactory Factory to create helpers
63
     * @return CachingStrategyInterface The best caching strategy
64
     */
65 41
    public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory)
66
    {
67 41
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
68 33
            return new InMemoryStrategy($sharedStringsUniqueCount);
69
        } else {
70 8
            return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory);
71
        }
72
    }
73
74
    /**
75
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
76
     * and the amount of memory available.
77
     *
78
     * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
79
     * @return bool
80
     */
81 41
    protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
82
    {
83
        // if the number of shared strings in unknown, do not use "in memory" strategy
84 41
        if ($sharedStringsUniqueCount === null) {
85 3
            return false;
86
        }
87
88 38
        $memoryAvailable = $this->getMemoryLimitInKB();
89
90 38
        if ($memoryAvailable === -1) {
91
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
92 4
            return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
93
        } else {
94 34
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
95 34
            return ($memoryAvailable > $memoryNeeded);
96
        }
97
    }
98
99
    /**
100
     * Returns the PHP "memory_limit" in Kilobytes
101
     *
102
     * @return float
103
     */
104 43
    protected function getMemoryLimitInKB()
105
    {
106 43
        $memoryLimitFormatted = $this->getMemoryLimitFromIni();
107 43
        $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
108
109
        // No memory limit
110 43
        if ($memoryLimitFormatted === '-1') {
111 2
            return -1;
112
        }
113
114 41
        if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
115 40
            $amount = intval($matches[1]);
116 40
            $unit = $matches[2];
117
118
            switch ($unit) {
119 40
                case 'b': return ($amount / 1024);
120 39
                case 'k': return $amount;
121 37
                case 'm': return ($amount * 1024);
122 35
                case 'g': return ($amount * 1024 * 1024);
123 2
                case 't': return ($amount * 1024 * 1024 * 1024);
124
            }
125
        }
126
127 1
        return -1;
128
    }
129
130
    /**
131
     * Returns the formatted "memory_limit" value
132
     *
133
     * @return string
134
     */
135 32
    protected function getMemoryLimitFromIni()
136
    {
137 32
        return ini_get('memory_limit');
138
    }
139
}
140