Passed
Push — master ( 182ac1...56b78a )
by Dennis
04:07
created

EcojiStream::readUnicodeChar()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 19
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 8
nc 4
nop 1
dl 0
loc 19
ccs 9
cts 9
cp 1
crap 4
rs 9.2
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Dennis Meckel
5
 *
6
 * For the full copyright and license information,
7
 * please view the LICENSE file that was distributed with this source code.
8
 */
9
10
namespace Rayne\Ecoji;
11
12
use InvalidArgumentException;
13
use OutOfBoundsException;
14
15
/**
16
 * TODO A stream filter implementation (`php_user_filter`) would be nice. But how would the filter be configured?
17
 */
18
class EcojiStream
19
{
20
    /**
21
     * @var Ecoji
22
     */
23
    private $ecoji;
24
25
    /**
26
     * @var int
27
     */
28
    private $wrap = 0;
29
30
    /**
31
     * @param Ecoji|null $ecoji
32
     */
33 1608
    public function __construct(Ecoji $ecoji = null)
34
    {
35 1608
        $this->ecoji = $ecoji ?: new Ecoji;
36 1608
    }
37
38
    /**
39
     * Wrapping is disabled by default.
40
     *
41
     * @param int $wrap `0` disables wrapping.
42
     * @return $this
43
     * @throws OutOfBoundsException
44
     */
45 21
    public function setWrap(int $wrap)
46
    {
47 21
        if ($wrap < 0) {
48 3
            throw new OutOfBoundsException($wrap);
49
        }
50
51 18
        $this->wrap = $wrap;
52
53 18
        return $this;
54
    }
55
56
    /**
57
     * Encodes the source stream. The destination stream will not receive a trailing newline.
58
     *
59
     * @param resource $source
60
     * @param resource $destination
61
     */
62 801
    public function encode($source, $destination)
63
    {
64 801
        $printNewline = false;
65 801
        $untilWrap = $this->wrap;
66 801
        $wrap = $this->wrap;
67
68
        // Read multiples of five bytes at a time to prevent padding when encoding.
69
        // Padding is only allowed for the last four or less bytes.
70 801
        while ('' !== ($bytes = stream_get_contents($source, 5))) {
71 801
            if ($wrap) {
72 18
                $buffer = $this->ecoji->encode($bytes); // Up to five bytes get encoded …
73 18
                $bufferLength = mb_strlen($buffer); // … and exactly four emojis are returned.
74
75 18
                while ($bufferLength > 0) {
76 18
                    if ($printNewline) {
77 12
                        $printNewline = false;
78 12
                        fwrite($destination, "\n");
79
                    }
80
81 18
                    if ($bufferLength < $untilWrap) {
82 12
                        fwrite($destination, $buffer);
83 12
                        $untilWrap -= $bufferLength;
84 12
                        break;
85
                    }
86
87 15
                    $printNewline = true;
88
89 15
                    if ($bufferLength == $untilWrap) {
90 12
                        fwrite($destination, $buffer);
91 12
                        $untilWrap = $wrap;
92 12
                        break;
93
                    }
94
95 12
                    fwrite($destination, mb_substr($buffer, 0, $untilWrap));
96 12
                    $buffer = mb_substr($buffer, $untilWrap);
97 12
                    $bufferLength -= $untilWrap;
98 12
                    $untilWrap = $wrap;
99
                }
100
101 18
                continue;
102
            }
103
104 783
            fwrite($destination, $this->ecoji->encode($bytes));
105
        }
106 801
    }
107
108
    /**
109
     * @param resource $source
110
     * @param resource $destination
111
     */
112 804
    public function decode($source, $destination)
113
    {
114 804
        $chars = [];
115
116 804
        while ('' !== ($char = $this->readUnicodeChar($source))) {
117
            // Skip newlines as Ecoji ignores them when decoding.
118
            // To be more precise: `decode()` throws newlines away
119
            // and expects groups of four remaining UTF-8 characters.
120 789
            if ($char === "\n") {
121 3
                continue;
122
            }
123
124 789
            $chars[] = $char;
125
126 789
            if (count($chars) == 4) {
127 789
                fwrite($destination, $this->ecoji->decode(implode('', $chars)));
128 789
                $chars = [];
129
            }
130
        }
131
132
        // Un-decoded characters remaining => Invalid Ecoji encoding!
133 789
        if (count($chars) !== 0) {
134 3
            throw new InvalidArgumentException('Invalid Ecoji encoding: ' . implode('', $chars));
135
        }
136 786
    }
137
138
    /**
139
     * @param resource $stream
140
     * @return string
141
     */
142 804
    private function readUnicodeChar($stream): string
143
    {
144 804
        $bytes = '';
145
146
        // Read up to four bytes and return when a valid UTF-8 sequence was detected.
147 804
        for ($i = 0; $i < 4; $i++) {
148 804
            $bytes .= stream_get_contents($stream, 1);
149
150
            // EOF reached.
151 804
            if ($bytes === '') {
152 789
                return '';
153
            }
154
155 804
            if (mb_check_encoding($bytes, 'UTF-8')) {
156 789
                return $bytes;
157
            }
158
        }
159
160 15
        throw new InvalidArgumentException('Invalid Unicode: ' . $bytes);
161
    }
162
163
    //
164
    // TODO Compare the performance of the commented code below with the "simple" implementation above.
165
    //
166
    //private function readUnicodeChar($stream): string {
167
    //    $byte0 = stream_get_contents($stream, 1);
168
    //
169
    //    // EOF
170
    //    if ($byte0 === '') {
171
    //        return '';
172
    //    }
173
    //
174
    //    // 0xxxxxxx
175
    //    if (ord($byte0) < 0x80) {
176
    //        return $byte0;
177
    //    }
178
    //
179
    //    // 110yyyyy 10xxxxxx
180
    //    if ((ord($byte0) & 0xE0) === 0xC0) {
181
    //        $byte1 = stream_get_contents($stream, 1);
182
    //
183
    //        if ((ord($byte1) & 0xC0) !== 0x80) {
184
    //            throw new InvalidArgumentException('Invalid encoding.');
185
    //        }
186
    //
187
    //        return $byte0 . $byte1;
188
    //    }
189
    //
190
    //    // 1110zzzz 10yyyyyy 10xxxxxx
191
    //    if ((ord($byte0) & 0xF0) === 0xE0) {
192
    //        $byte1 = stream_get_contents($stream, 1);
193
    //
194
    //        if ((ord($byte1) & 0xC0) !== 0x80) {
195
    //            throw new InvalidArgumentException('Invalid encoding.');
196
    //        }
197
    //
198
    //        $byte2 = stream_get_contents($stream, 1);
199
    //
200
    //        if ((ord($byte2) & 0xC0) !== 0x80) {
201
    //            throw new InvalidArgumentException('Invalid encoding.');
202
    //        }
203
    //
204
    //        return $byte0 . $byte1 . $byte2;
205
    //    }
206
    //
207
    //    // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
208
    //    if ((ord($byte0) & 0xF8) === 0xF0) {
209
    //        $byte1 = stream_get_contents($stream, 1);
210
    //
211
    //        if ((ord($byte1) & 0xC0) !== 0x80) {
212
    //            throw new InvalidArgumentException('Invalid encoding.');
213
    //        }
214
    //
215
    //        $byte2 = stream_get_contents($stream, 1);
216
    //
217
    //        if ((ord($byte2) & 0xC0) !== 0x80) {
218
    //            throw new InvalidArgumentException('Invalid encoding.');
219
    //        }
220
    //
221
    //        $byte3 = stream_get_contents($stream, 1);
222
    //
223
    //        if ((ord($byte3) & 0xC0) !== 0x80) {
224
    //            throw new InvalidArgumentException('Invalid encoding.');
225
    //        }
226
    //
227
    //        return $byte0 . $byte1 . $byte2 . $byte3;
228
    //    }
229
    //
230
    //    throw new InvalidArgumentException('Invalid encoding.');
231
    //}
232
}
233