1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | namespace Phpml\Dataset; |
||||
6 | |||||
7 | use Phpml\Exception\InvalidArgumentException; |
||||
8 | |||||
9 | /** |
||||
10 | * MNIST dataset: http://yann.lecun.com/exdb/mnist/ |
||||
11 | * original mnist dataset reader: https://github.com/AndrewCarterUK/mnist-neural-network-plain-php |
||||
12 | */ |
||||
13 | final class MnistDataset extends ArrayDataset |
||||
14 | { |
||||
15 | private const MAGIC_IMAGE = 0x00000803; |
||||
16 | |||||
17 | private const MAGIC_LABEL = 0x00000801; |
||||
18 | |||||
19 | private const IMAGE_ROWS = 28; |
||||
20 | |||||
21 | private const IMAGE_COLS = 28; |
||||
22 | |||||
23 | public function __construct(string $imagePath, string $labelPath) |
||||
24 | { |
||||
25 | $this->samples = $this->readImages($imagePath); |
||||
26 | $this->targets = $this->readLabels($labelPath); |
||||
27 | |||||
28 | if (count($this->samples) !== count($this->targets)) { |
||||
29 | throw new InvalidArgumentException('Must have the same number of images and labels'); |
||||
30 | } |
||||
31 | } |
||||
32 | |||||
33 | private function readImages(string $imagePath): array |
||||
34 | { |
||||
35 | $stream = fopen($imagePath, 'rb'); |
||||
36 | |||||
37 | if ($stream === false) { |
||||
38 | throw new InvalidArgumentException('Could not open file: '.$imagePath); |
||||
39 | } |
||||
40 | |||||
41 | $images = []; |
||||
42 | |||||
43 | try { |
||||
44 | $header = fread($stream, 16); |
||||
45 | |||||
46 | $fields = unpack('Nmagic/Nsize/Nrows/Ncols', (string) $header); |
||||
47 | |||||
48 | if ($fields['magic'] !== self::MAGIC_IMAGE) { |
||||
49 | throw new InvalidArgumentException('Invalid magic number: '.$imagePath); |
||||
50 | } |
||||
51 | |||||
52 | if ($fields['rows'] != self::IMAGE_ROWS) { |
||||
53 | throw new InvalidArgumentException('Invalid number of image rows: '.$imagePath); |
||||
54 | } |
||||
55 | |||||
56 | if ($fields['cols'] != self::IMAGE_COLS) { |
||||
57 | throw new InvalidArgumentException('Invalid number of image cols: '.$imagePath); |
||||
58 | } |
||||
59 | |||||
60 | for ($i = 0; $i < $fields['size']; $i++) { |
||||
61 | $imageBytes = fread($stream, $fields['rows'] * $fields['cols']); |
||||
62 | |||||
63 | // Convert to float between 0 and 1 |
||||
64 | $images[] = array_map(function ($b) { |
||||
65 | return $b / 255; |
||||
66 | }, array_values(unpack('C*', (string) $imageBytes))); |
||||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
67 | } |
||||
68 | } finally { |
||||
69 | fclose($stream); |
||||
70 | } |
||||
71 | |||||
72 | return $images; |
||||
73 | } |
||||
74 | |||||
75 | private function readLabels(string $labelPath): array |
||||
76 | { |
||||
77 | $stream = fopen($labelPath, 'rb'); |
||||
78 | |||||
79 | if ($stream === false) { |
||||
80 | throw new InvalidArgumentException('Could not open file: '.$labelPath); |
||||
81 | } |
||||
82 | |||||
83 | $labels = []; |
||||
84 | |||||
85 | try { |
||||
86 | $header = fread($stream, 8); |
||||
87 | |||||
88 | $fields = unpack('Nmagic/Nsize', (string) $header); |
||||
89 | |||||
90 | if ($fields['magic'] !== self::MAGIC_LABEL) { |
||||
91 | throw new InvalidArgumentException('Invalid magic number: '.$labelPath); |
||||
92 | } |
||||
93 | |||||
94 | $labels = fread($stream, $fields['size']); |
||||
95 | } finally { |
||||
96 | fclose($stream); |
||||
97 | } |
||||
98 | |||||
99 | return array_values(unpack('C*', (string) $labels)); |
||||
0 ignored issues
–
show
It seems like
unpack('C*', (string)$labels) can also be of type false ; however, parameter $input of array_values() does only seem to accept array , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
100 | } |
||||
101 | } |
||||
102 |