Completed
Push — master ( 8c14af...cc53e1 )
by Florian
01:37
created

Reader::readFile()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 6
cts 6
cp 1
rs 9.9332
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Cocur\Arff;
4
5
use Cocur\Arff\Column\DateColumn;
6
use Cocur\Arff\Column\NominalColumn;
7
use Cocur\Arff\Column\NumericColumn;
8
use Cocur\Arff\Column\StringColumn;
9
10
class Reader
11
{
12
    /**
13
     * @param string $filename
14
     *
15
     * @return Document
16
     */
17 1
    public function readFile($filename)
18
    {
19 1
        $lines    = explode("\n", file_get_contents($filename));
20 1
        $document = new Document($this->parseName($lines[0]));
21
22 1
        $this->parseColumns($document, $lines);
23 1
        $this->parseData($document, $lines);
24
25 1
        return $document;
26
    }
27
28
    /**
29
     * @param string $line
30
     *
31
     * @return string|null
32
     */
33
    protected function parseName($line)
34
    {
35
        if (preg_match('/^@RELATION ([a-zA-Z-_\.\/\d]+)$/i', $line, $matches)) {
36
            return $matches[1];
37
        }
38
39
        return null;
40
    }
41
42
    /**
43
     * @param Document $document
44
     * @param string[] $lines
45
     */
46
    protected function parseColumns(Document $document, array $lines)
47
    {
48
        foreach ($lines as $line) {
49
            if (preg_match('/ATTRIBUTE\s([a-zA-Z0-9_-]+)\s(.*)/i', $line, $matches)) {
50
                $type   = $matches[2];
51
                $column = null;
52
                if (strcasecmp($type, 'string') === 0) {
53
                    $column = new StringColumn($matches[1]);
54
                } else if (strcasecmp($type, 'numeric') === 0) {
55
                    $column = new NumericColumn($matches[1]);
56
                } else if (preg_match('/^\{(.*)\}$/', $matches[2], $classMatches)) {
57
                    $column = new NominalColumn($matches[1], array_map(function ($value) {
58
                        return trim($value, "'");
59
                    }, preg_split(
60
                            "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/",
61
                            $classMatches[1]
62
                        )
63
                    ));
64
                } else if (preg_match('/date\s\"/', $matches[2])) {
65
                    preg_match('/date\s"([A-Za-z0-9-: ]+)"/', $line, $dateMatches);
66
                    $column = new DateColumn($matches[1], $dateMatches[1]);
67
                }
68
                if ($column) {
69
                    $document->addColumn($column);
70
                }
71
            }
72
        }
73
    }
74
75
    /**
76
     * @param Document $document
77
     * @param string[] $lines
78
     */
79
    protected function parseData(Document $document, array $lines)
80
    {
81
        $index = 0;
82
        while (!preg_match('/@DATA/i', $lines[$index])) {
83
            $index++;
84
        }
85
        $columns     = $document->getColumns();
86
        $columnNames = array_keys($columns);
87
        for ($i = $index+1; $i < count($lines); $i += 1) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
88
            $row    = [];
89
            $splits = preg_split(
90
                "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/",
91
                $lines[$i],
92
                -1,
93
                PREG_SPLIT_DELIM_CAPTURE
94
            );
95
            foreach ($splits as $j => $value) {
96
                if (isset($columnNames[$j])) {
97
                    $row[$columns[$columnNames[$j]]->getName()] = trim($value, "'");
98
                }
99
            }
100
101
            if (count($row) != count($columnNames)) {
102
                continue; // malformed, probably and empty line
103
            }
104
105
            $document->addData($row);
106
        }
107
    }
108
}
109