Completed
Push — master ( 301313...fef224 )
by Florian
03:10
created

Reader::parseColumns()   C

Complexity

Conditions 8
Paths 12

Size

Total Lines 22
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 72

Importance

Changes 0
Metric Value
dl 0
loc 22
ccs 0
cts 17
cp 0
rs 6.6037
c 0
b 0
f 0
cc 8
eloc 16
nc 12
nop 2
crap 72
1
<?php
2
3
namespace Cocur\Arff;
4
5
use Cocur\Arff\Column\DateColumn;
6
use Cocur\Arff\Column\NominalColumn;
7
use Cocur\Arff\Column\NumericColumn;
8
use Cocur\Arff\Column\StringColumn;
9
10
class Reader
11
{
12
    /**
13
     * @param string $filename
14
     *
15
     * @return Document
16
     */
17 1
    public function readFile($filename)
18
    {
19 1
        $lines    = explode("\n", file_get_contents($filename));
20 1
        $document = new Document($this->parseName($lines[0]));
21
22 1
        $this->parseColumns($document, $lines);
23 1
        $this->parseData($document, $lines);
24
25 1
        return $document;
26
    }
27
28
    /**
29
     * @param string $line
30
     *
31
     * @return string|null
32
     */
33
    protected function parseName($line)
34
    {
35
        if (preg_match('/^@RELATION ([a-zA-Z-_]+)$/i', $line, $matches)) {
36
            return $matches[1];
37
        }
38
39
        return null;
40
    }
41
42
    /**
43
     * @param Document $document
44
     * @param string[] $lines
45
     */
46
    protected function parseColumns(Document $document, array $lines)
47
    {
48
        foreach ($lines as $line) {
49
            if (preg_match('/ATTRIBUTE\s([a-zA-Z0-9_-]+)\s([{},a-zA-Z]+)/i', $line, $matches)) {
50
                $type   = $matches[2];
51
                $column = null;
52
                if ($type === 'string') {
53
                    $column = new StringColumn($matches[1]);
54
                } else if ($type === 'numeric') {
55
                    $column = new NumericColumn($matches[1]);
56
                } else if (preg_match('/^\{([a-zA-Z0-9,]+)\}$/', $matches[2], $classMatches)) {
57
                    $column = new NominalColumn($matches[1], explode(',', $classMatches[1]));
58
                } else if ($type === 'date') {
59
                    preg_match('/date\s"([A-Za-z0-9-: ]+)"/', $line, $dateMatches);
60
                    $column = new DateColumn($matches[1], $dateMatches[1]);
61
                }
62
                if ($column) {
63
                    $document->addColumn($column);
64
                }
65
            }
66
        }
67
    }
68
69
    /**
70
     * @param Document $document
71
     * @param string[] $lines
72
     */
73
    protected function parseData(Document $document, array $lines)
74
    {
75
        $index = 0;
76
        while (!preg_match('/@DATA/i', $lines[$index])) {
77
            $index++;
78
        }
79
        $columns = $document->getColumns();
0 ignored issues
show
Coding Style introduced by
Equals sign not aligned with surrounding assignments; expected 5 spaces but found 1 space

This check looks for multiple assignments in successive lines of code. It will report an issue if the operators are not in a straight line.

To visualize

$a = "a";
$ab = "ab";
$abc = "abc";

will produce issues in the first and second line, while this second example

$a   = "a";
$ab  = "ab";
$abc = "abc";

will produce no issues.

Loading history...
80
        $columnNames = array_keys($columns);
81
        for ($i = $index + 1; $i < count($lines); $i += 1) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
Coding Style introduced by
Increment operators should be used where possible; found "$i += 1)" but expected "$i++"
Loading history...
82
            $row = [];
0 ignored issues
show
Coding Style introduced by
Equals sign not aligned with surrounding assignments; expected 4 spaces but found 1 space

This check looks for multiple assignments in successive lines of code. It will report an issue if the operators are not in a straight line.

To visualize

$a = "a";
$ab = "ab";
$abc = "abc";

will produce issues in the first and second line, while this second example

$a   = "a";
$ab  = "ab";
$abc = "abc";

will produce no issues.

Loading history...
83
            $splits = preg_split(
84
                "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/",
85
                $lines[$i],
86
                -1,
87
                PREG_SPLIT_DELIM_CAPTURE
88
            );
89
            foreach ($splits as $j => $value) {
90
                if (isset($columnNames[$j])) {
91
                    $row[$columns[$columnNames[$j]]->getName()] = trim($value, "'");
92
                }
93
            }
94
            $document->addData($row);
95
        }
96
    }
97
}
98