1 | <?php |
||
2 | /** |
||
3 | * This file is part of the ZBateson\StreamDecorator project. |
||
4 | * |
||
5 | * @license http://opensource.org/licenses/bsd-license.php BSD |
||
6 | */ |
||
7 | |||
8 | namespace ZBateson\StreamDecorators; |
||
9 | |||
10 | use GuzzleHttp\Psr7\StreamDecoratorTrait; |
||
11 | use Psr\Http\Message\StreamInterface; |
||
12 | use RuntimeException; |
||
13 | use ZBateson\MbWrapper\MbWrapper; |
||
14 | |||
15 | /** |
||
16 | * GuzzleHttp\Psr7 stream decoder extension for charset conversion. |
||
17 | * |
||
18 | * @author Zaahid Bateson |
||
19 | */ |
||
20 | class CharsetStream implements StreamInterface |
||
21 | { |
||
22 | use StreamDecoratorTrait; |
||
23 | |||
24 | /** |
||
25 | * @var MbWrapper the charset converter |
||
26 | */ |
||
27 | protected $converter = null; |
||
28 | |||
29 | /** |
||
30 | * @var string charset of the source stream |
||
31 | */ |
||
32 | protected $streamCharset = 'ISO-8859-1'; |
||
33 | |||
34 | /** |
||
35 | * @var string charset of strings passed in write operations, and returned |
||
36 | * in read operations. |
||
37 | */ |
||
38 | protected $stringCharset = 'UTF-8'; |
||
39 | |||
40 | /** |
||
41 | * @var int current read/write position |
||
42 | */ |
||
43 | private $position = 0; |
||
44 | |||
45 | /** |
||
46 | * @var int number of $stringCharset characters in $buffer |
||
47 | */ |
||
48 | private $bufferLength = 0; |
||
49 | |||
50 | /** |
||
51 | * @var string a buffer of characters read in the original $streamCharset |
||
52 | * encoding |
||
53 | */ |
||
54 | private $buffer = ''; |
||
55 | |||
56 | /** |
||
57 | * @var StreamInterface $stream |
||
58 | */ |
||
59 | private $stream; |
||
60 | |||
61 | /** |
||
62 | * @param StreamInterface $stream Stream to decorate |
||
63 | * @param string $streamCharset The underlying stream's charset |
||
64 | * @param string $stringCharset The charset to encode strings to (or |
||
65 | * expected for write) |
||
66 | */ |
||
67 | 9 | public function __construct(StreamInterface $stream, string $streamCharset = 'ISO-8859-1', string $stringCharset = 'UTF-8') |
|
68 | { |
||
69 | 9 | $this->stream = $stream; |
|
70 | 9 | $this->converter = new MbWrapper(); |
|
71 | 9 | $this->streamCharset = $streamCharset; |
|
72 | 9 | $this->stringCharset = $stringCharset; |
|
73 | 9 | } |
|
74 | |||
75 | /** |
||
76 | * Overridden to return the position in the target encoding. |
||
77 | */ |
||
78 | 2 | public function tell() : int |
|
79 | { |
||
80 | 2 | return $this->position; |
|
81 | } |
||
82 | |||
83 | /** |
||
84 | * Returns null, getSize isn't supported |
||
85 | * |
||
86 | * @return null |
||
87 | */ |
||
88 | 1 | public function getSize() : ?int |
|
89 | { |
||
90 | 1 | return null; |
|
91 | } |
||
92 | |||
93 | /** |
||
94 | * Not supported. |
||
95 | * |
||
96 | * @param int $offset |
||
0 ignored issues
–
show
Coding Style
introduced
by
![]() |
|||
97 | * @param int $whence |
||
0 ignored issues
–
show
|
|||
98 | * @throws RuntimeException |
||
99 | */ |
||
100 | 1 | public function seek($offset, $whence = SEEK_SET) |
|
0 ignored issues
–
show
|
|||
101 | { |
||
102 | 1 | throw new RuntimeException('Cannot seek a CharsetStream'); |
|
103 | } |
||
104 | |||
105 | /** |
||
106 | * Overridden to return false |
||
107 | */ |
||
108 | 1 | public function isSeekable() : bool |
|
109 | { |
||
110 | 1 | return false; |
|
111 | } |
||
112 | |||
113 | /** |
||
114 | * Reads a minimum of $length characters from the underlying stream in its |
||
115 | * encoding into $this->buffer. |
||
116 | * |
||
117 | * Aligning to 4 bytes seemed to solve an issue reading from UTF-16LE |
||
118 | * streams and pass testReadUtf16LeToEof, although the buffered string |
||
119 | * should've solved that on its own. |
||
120 | */ |
||
121 | 7 | private function readRawCharsIntoBuffer(int $length) : void |
|
122 | { |
||
123 | 7 | $n = (int) \ceil(($length + 32) / 4.0) * 4; |
|
124 | 7 | while ($this->bufferLength < $n) { |
|
125 | 7 | $raw = $this->stream->read($n + 512); |
|
126 | 7 | if ($raw === '') { |
|
127 | 7 | return; |
|
128 | } |
||
129 | 7 | $this->buffer .= $raw; |
|
130 | 7 | $this->bufferLength = $this->converter->getLength($this->buffer, $this->streamCharset); |
|
131 | } |
||
132 | 5 | } |
|
133 | |||
134 | /** |
||
135 | * Returns true if the end of stream has been reached. |
||
136 | */ |
||
137 | 7 | public function eof() : bool |
|
138 | { |
||
139 | 7 | return ($this->bufferLength === 0 && $this->stream->eof()); |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * Reads up to $length decoded chars from the underlying stream and returns |
||
144 | * them after converting to the target string charset. |
||
145 | * |
||
146 | * @param int $length |
||
0 ignored issues
–
show
|
|||
147 | * @return string |
||
0 ignored issues
–
show
|
|||
148 | */ |
||
149 | 7 | public function read($length) |
|
0 ignored issues
–
show
|
|||
150 | { |
||
151 | // let Guzzle decide what to do. |
||
152 | 7 | if ($length <= 0 || $this->eof()) { |
|
153 | 1 | return $this->stream->read($length); |
|
154 | } |
||
155 | 7 | $this->readRawCharsIntoBuffer($length); |
|
156 | 7 | $numChars = \min([$this->bufferLength, $length]); |
|
157 | 7 | $chars = $this->converter->getSubstr($this->buffer, $this->streamCharset, 0, $numChars); |
|
158 | |||
159 | 7 | $this->position += $numChars; |
|
160 | 7 | $this->buffer = $this->converter->getSubstr($this->buffer, $this->streamCharset, $numChars); |
|
161 | 7 | $this->bufferLength -= $numChars; |
|
162 | |||
163 | 7 | return $this->converter->convert($chars, $this->streamCharset, $this->stringCharset); |
|
164 | } |
||
165 | |||
166 | /** |
||
167 | * Writes the passed string to the underlying stream after converting it to |
||
168 | * the target stream encoding. |
||
169 | * |
||
170 | * @param string $string |
||
0 ignored issues
–
show
|
|||
171 | * @return int the number of bytes written |
||
0 ignored issues
–
show
|
|||
172 | */ |
||
173 | 1 | public function write($string) : int |
|
174 | { |
||
175 | 1 | $converted = $this->converter->convert($string, $this->stringCharset, $this->streamCharset); |
|
176 | 1 | $written = $this->converter->getLength($converted, $this->streamCharset); |
|
177 | 1 | $this->position += $written; |
|
178 | 1 | return $this->stream->write($converted); |
|
179 | } |
||
180 | } |
||
181 |