Failed Conditions
Push — master ( 67fec4...924347 )
by Adrien
07:48
created

Delimiter::getDefaultDelimiter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 1
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader\Csv;
4
5
class Delimiter
6
{
7
    protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~'];
8
9
    protected $fileHandle;
10
11
    protected $escapeCharacter;
12
13
    protected $enclosure;
14
15
    protected $counts = [];
16
17
    protected $numberLines = 0;
18
19
    protected $delimiter;
20
21 50
    public function __construct($fileHandle, $escapeCharacter, $enclosure)
22
    {
23 50
        $this->fileHandle = $fileHandle;
24 50
        $this->escapeCharacter = $escapeCharacter;
25 50
        $this->enclosure = $enclosure;
26
27 50
        $this->countPotentialDelimiters();
28 50
    }
29
30 4
    public function getDefaultDelimiter(): string
31
    {
32 4
        return self::POTENTIAL_DELIMETERS[0];
33
    }
34
35 50
    public function linesCounted(): int
36
    {
37 50
        return $this->numberLines;
38
    }
39
40 50
    protected function countPotentialDelimiters(): void
41
    {
42 50
        $this->counts = array_fill_keys(self::POTENTIAL_DELIMETERS, []);
43 50
        $delimiterKeys = array_flip(self::POTENTIAL_DELIMETERS);
44
45
        // Count how many times each of the potential delimiters appears in each line
46 50
        $this->numberLines = 0;
47 50
        while (($line = $this->getNextLine()) !== false && (++$this->numberLines < 1000)) {
48 49
            $this->countDelimiterValues($line, $delimiterKeys);
49
        }
50 50
    }
51
52 49
    protected function countDelimiterValues(string $line, array $delimiterKeys): void
53
    {
54 49
        $splitString = str_split($line, 1);
55 49
        if (!is_array($splitString)) {
56
            return;
57
        }
58
59 49
        $distribution = array_count_values($splitString);
60 49
        $countLine = array_intersect_key($distribution, $delimiterKeys);
61
62 49
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
63 49
            $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
64
        }
65 49
    }
66
67 49
    public function infer(): ?string
68
    {
69
        // Calculate the mean square deviations for each delimiter
70
        //     (ignoring delimiters that haven't been found consistently)
71 49
        $meanSquareDeviations = [];
72 49
        $middleIdx = floor(($this->numberLines - 1) / 2);
73
74 49
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
75 49
            $series = $this->counts[$delimiter];
76 49
            sort($series);
77
78 49
            $median = ($this->numberLines % 2)
79 13
                ? $series[$middleIdx]
80 49
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
81
82 49
            if ($median === 0) {
83 49
                continue;
84
            }
85
86 46
            $meanSquareDeviations[$delimiter] = array_reduce(
87
                $series,
88
                function ($sum, $value) use ($median) {
89 46
                    return $sum + ($value - $median) ** 2;
90 46
                }
91 46
            ) / count($series);
92
        }
93
94
        // ... and pick the delimiter with the smallest mean square deviation
95
        //         (in case of ties, the order in potentialDelimiters is respected)
96 49
        $min = INF;
97 49
        foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
98 49
            if (!isset($meanSquareDeviations[$delimiter])) {
99 49
                continue;
100
            }
101
102 46
            if ($meanSquareDeviations[$delimiter] < $min) {
103 46
                $min = $meanSquareDeviations[$delimiter];
104 46
                $this->delimiter = $delimiter;
105
            }
106
        }
107
108 49
        return $this->delimiter;
109
    }
110
111
    /**
112
     * Get the next full line from the file.
113
     *
114
     * @return false|string
115
     */
116 50
    public function getNextLine()
117
    {
118 50
        $line = '';
119 50
        $enclosure = ($this->escapeCharacter === '' ? ''
120 49
                : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
121 50
            . preg_quote($this->enclosure, '/');
122
123
        do {
124
            // Get the next line in the file
125 50
            $newLine = fgets($this->fileHandle);
126
127
            // Return false if there is no next line
128 50
            if ($newLine === false) {
129 50
                return false;
130
            }
131
132
            // Add the new line to the line passed in
133 49
            $line = $line . $newLine;
134
135
            // Drop everything that is enclosed to avoid counting false positives in enclosures
136 49
            $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
137
138
            // See if we have any enclosures left in the line
139
            // if we still have an enclosure then we need to read the next line as well
140 49
        } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
141
142 49
        return $line;
143
    }
144
}
145