SvmDataset::parseFeatureIndex()   A
last analyzed

Complexity

Conditions 4
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 5
c 1
b 0
f 0
dl 0
loc 11
rs 10
cc 4
nc 3
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Dataset;
6
7
use Phpml\Exception\DatasetException;
8
use Phpml\Exception\FileException;
9
10
class SvmDataset extends ArrayDataset
11
{
12
    public function __construct(string $filePath)
13
    {
14
        [$samples, $targets] = self::readProblem($filePath);
15
16
        parent::__construct($samples, $targets);
17
    }
18
19
    private static function readProblem(string $filePath): array
20
    {
21
        $handle = self::openFile($filePath);
22
23
        $samples = [];
24
        $targets = [];
25
        $maxIndex = 0;
26
        while (false !== $line = fgets($handle)) {
27
            [$sample, $target, $maxIndex] = self::processLine($line, $maxIndex);
28
            $samples[] = $sample;
29
            $targets[] = $target;
30
        }
31
32
        fclose($handle);
33
34
        foreach ($samples as &$sample) {
35
            $sample = array_pad($sample, $maxIndex + 1, 0);
36
        }
37
38
        return [$samples, $targets];
39
    }
40
41
    /**
42
     * @return resource
43
     */
44
    private static function openFile(string $filePath)
45
    {
46
        if (!file_exists($filePath)) {
47
            throw new FileException(sprintf('File "%s" missing.', basename($filePath)));
48
        }
49
50
        $handle = fopen($filePath, 'rb');
51
        if ($handle === false) {
52
            throw new FileException(sprintf('File "%s" can\'t be open.', basename($filePath)));
53
        }
54
55
        return $handle;
56
    }
57
58
    private static function processLine(string $line, int $maxIndex): array
59
    {
60
        $columns = self::parseLine($line);
61
62
        $target = self::parseTargetColumn($columns[0]);
63
        $sample = array_fill(0, $maxIndex + 1, 0);
64
65
        $n = count($columns);
66
        for ($i = 1; $i < $n; ++$i) {
67
            [$index, $value] = self::parseFeatureColumn($columns[$i]);
68
            if ($index > $maxIndex) {
69
                $maxIndex = $index;
70
                $sample = array_pad($sample, $maxIndex + 1, 0);
71
            }
72
73
            $sample[$index] = $value;
74
        }
75
76
        return [$sample, $target, $maxIndex];
77
    }
78
79
    private static function parseLine(string $line): array
80
    {
81
        $line = explode('#', $line, 2)[0];
82
        $line = rtrim($line);
83
        $line = str_replace("\t", ' ', $line);
84
85
        return explode(' ', $line);
86
    }
87
88
    private static function parseTargetColumn(string $column): float
89
    {
90
        if (!is_numeric($column)) {
91
            throw new DatasetException(sprintf('Invalid target "%s".', $column));
92
        }
93
94
        return (float) $column;
95
    }
96
97
    private static function parseFeatureColumn(string $column): array
98
    {
99
        $feature = explode(':', $column, 2);
100
        if (count($feature) !== 2) {
101
            throw new DatasetException(sprintf('Invalid value "%s".', $column));
102
        }
103
104
        $index = self::parseFeatureIndex($feature[0]);
105
        $value = self::parseFeatureValue($feature[1]);
106
107
        return [$index, $value];
108
    }
109
110
    private static function parseFeatureIndex(string $index): int
111
    {
112
        if (!is_numeric($index) || !ctype_digit($index)) {
113
            throw new DatasetException(sprintf('Invalid index "%s".', $index));
114
        }
115
116
        if ((int) $index < 1) {
117
            throw new DatasetException(sprintf('Invalid index "%s".', $index));
118
        }
119
120
        return (int) $index - 1;
121
    }
122
123
    private static function parseFeatureValue(string $value): float
124
    {
125
        if (!is_numeric($value)) {
126
            throw new DatasetException(sprintf('Invalid value "%s".', $value));
127
        }
128
129
        return (float) $value;
130
    }
131
}
132