MnistDataset::__construct()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 4
c 1
b 0
f 0
dl 0
loc 7
rs 10
cc 2
nc 2
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Dataset;
6
7
use Phpml\Exception\InvalidArgumentException;
8
9
/**
10
 * MNIST dataset: http://yann.lecun.com/exdb/mnist/
11
 * original mnist dataset reader: https://github.com/AndrewCarterUK/mnist-neural-network-plain-php
12
 */
13
final class MnistDataset extends ArrayDataset
14
{
15
    private const MAGIC_IMAGE = 0x00000803;
16
17
    private const MAGIC_LABEL = 0x00000801;
18
19
    private const IMAGE_ROWS = 28;
20
21
    private const IMAGE_COLS = 28;
22
23
    public function __construct(string $imagePath, string $labelPath)
24
    {
25
        $this->samples = $this->readImages($imagePath);
26
        $this->targets = $this->readLabels($labelPath);
27
28
        if (count($this->samples) !== count($this->targets)) {
29
            throw new InvalidArgumentException('Must have the same number of images and labels');
30
        }
31
    }
32
33
    private function readImages(string $imagePath): array
34
    {
35
        $stream = fopen($imagePath, 'rb');
36
37
        if ($stream === false) {
38
            throw new InvalidArgumentException('Could not open file: '.$imagePath);
39
        }
40
41
        $images = [];
42
43
        try {
44
            $header = fread($stream, 16);
45
46
            $fields = unpack('Nmagic/Nsize/Nrows/Ncols', (string) $header);
47
48
            if ($fields['magic'] !== self::MAGIC_IMAGE) {
49
                throw new InvalidArgumentException('Invalid magic number: '.$imagePath);
50
            }
51
52
            if ($fields['rows'] != self::IMAGE_ROWS) {
53
                throw new InvalidArgumentException('Invalid number of image rows: '.$imagePath);
54
            }
55
56
            if ($fields['cols'] != self::IMAGE_COLS) {
57
                throw new InvalidArgumentException('Invalid number of image cols: '.$imagePath);
58
            }
59
60
            for ($i = 0; $i < $fields['size']; $i++) {
61
                $imageBytes = fread($stream, $fields['rows'] * $fields['cols']);
62
63
                // Convert to float between 0 and 1
64
                $images[] = array_map(function ($b) {
65
                    return $b / 255;
66
                }, array_values(unpack('C*', (string) $imageBytes)));
0 ignored issues
show
Bug introduced by Arkadiusz Kondas
It seems like unpack('C*', (string)$imageBytes) can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

66
                }, array_values(/** @scrutinizer ignore-type */ unpack('C*', (string) $imageBytes)));
Loading history...
67
            }
68
        } finally {
69
            fclose($stream);
70
        }
71
72
        return $images;
73
    }
74
75
    private function readLabels(string $labelPath): array
76
    {
77
        $stream = fopen($labelPath, 'rb');
78
79
        if ($stream === false) {
80
            throw new InvalidArgumentException('Could not open file: '.$labelPath);
81
        }
82
83
        $labels = [];
84
85
        try {
86
            $header = fread($stream, 8);
87
88
            $fields = unpack('Nmagic/Nsize', (string) $header);
89
90
            if ($fields['magic'] !== self::MAGIC_LABEL) {
91
                throw new InvalidArgumentException('Invalid magic number: '.$labelPath);
92
            }
93
94
            $labels = fread($stream, $fields['size']);
95
        } finally {
96
            fclose($stream);
97
        }
98
99
        return array_values(unpack('C*', (string) $labels));
0 ignored issues
show
Bug introduced by Arkadiusz Kondas
It seems like unpack('C*', (string)$labels) can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

99
        return array_values(/** @scrutinizer ignore-type */ unpack('C*', (string) $labels));
Loading history...
100
    }
101
}
102