Completed
Push — master ( 8c14af...cc53e1 )
by Florian
01:37
created

Reader   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 99
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 6

Test Coverage

Coverage 12%

Importance

Changes 0
Metric Value
wmc 17
lcom 1
cbo 6
dl 0
loc 99
ccs 6
cts 50
cp 0.12
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A readFile() 0 10 1
A parseName() 0 8 2
B parseData() 0 29 6
B parseColumns() 0 28 8
1
<?php
2
3
namespace Cocur\Arff;
4
5
use Cocur\Arff\Column\DateColumn;
6
use Cocur\Arff\Column\NominalColumn;
7
use Cocur\Arff\Column\NumericColumn;
8
use Cocur\Arff\Column\StringColumn;
9
10
class Reader
11
{
12
    /**
13
     * @param string $filename
14
     *
15
     * @return Document
16
     */
17 1
    public function readFile($filename)
18
    {
19 1
        $lines    = explode("\n", file_get_contents($filename));
20 1
        $document = new Document($this->parseName($lines[0]));
21
22 1
        $this->parseColumns($document, $lines);
23 1
        $this->parseData($document, $lines);
24
25 1
        return $document;
26
    }
27
28
    /**
29
     * @param string $line
30
     *
31
     * @return string|null
32
     */
33
    protected function parseName($line)
34
    {
35
        if (preg_match('/^@RELATION ([a-zA-Z-_\.\/\d]+)$/i', $line, $matches)) {
36
            return $matches[1];
37
        }
38
39
        return null;
40
    }
41
42
    /**
43
     * @param Document $document
44
     * @param string[] $lines
45
     */
46
    protected function parseColumns(Document $document, array $lines)
47
    {
48
        foreach ($lines as $line) {
49
            if (preg_match('/ATTRIBUTE\s([a-zA-Z0-9_-]+)\s(.*)/i', $line, $matches)) {
50
                $type   = $matches[2];
51
                $column = null;
52
                if (strcasecmp($type, 'string') === 0) {
53
                    $column = new StringColumn($matches[1]);
54
                } else if (strcasecmp($type, 'numeric') === 0) {
55
                    $column = new NumericColumn($matches[1]);
56
                } else if (preg_match('/^\{(.*)\}$/', $matches[2], $classMatches)) {
57
                    $column = new NominalColumn($matches[1], array_map(function ($value) {
58
                        return trim($value, "'");
59
                    }, preg_split(
60
                            "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/",
61
                            $classMatches[1]
62
                        )
63
                    ));
64
                } else if (preg_match('/date\s\"/', $matches[2])) {
65
                    preg_match('/date\s"([A-Za-z0-9-: ]+)"/', $line, $dateMatches);
66
                    $column = new DateColumn($matches[1], $dateMatches[1]);
67
                }
68
                if ($column) {
69
                    $document->addColumn($column);
70
                }
71
            }
72
        }
73
    }
74
75
    /**
76
     * @param Document $document
77
     * @param string[] $lines
78
     */
79
    protected function parseData(Document $document, array $lines)
80
    {
81
        $index = 0;
82
        while (!preg_match('/@DATA/i', $lines[$index])) {
83
            $index++;
84
        }
85
        $columns     = $document->getColumns();
86
        $columnNames = array_keys($columns);
87
        for ($i = $index+1; $i < count($lines); $i += 1) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
88
            $row    = [];
89
            $splits = preg_split(
90
                "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/",
91
                $lines[$i],
92
                -1,
93
                PREG_SPLIT_DELIM_CAPTURE
94
            );
95
            foreach ($splits as $j => $value) {
96
                if (isset($columnNames[$j])) {
97
                    $row[$columns[$columnNames[$j]]->getName()] = trim($value, "'");
98
                }
99
            }
100
101
            if (count($row) != count($columnNames)) {
102
                continue; // malformed, probably and empty line
103
            }
104
105
            $document->addData($row);
106
        }
107
    }
108
}
109