1 | <?php |
||
23 | class StreamTokeniser implements TokeniserInterface |
||
24 | { |
||
25 | use StateBuilder; |
||
26 | |||
27 | /** @var int */ |
||
28 | private $maxTypeLength; |
||
29 | /** @var StreamInterface */ |
||
30 | private $stream; |
||
31 | /** @var State */ |
||
32 | private $state; |
||
33 | /** @var TokenStore */ |
||
34 | private $tokenStore; |
||
35 | |||
36 | /** |
||
37 | * Tokeniser constructor. |
||
38 | * |
||
39 | * @param CsvConfigurationInterface $config |
||
40 | * @param StreamInterface $stream |
||
41 | */ |
||
42 | 28 | public function __construct(CsvConfigurationInterface $config, StreamInterface $stream) |
|
50 | |||
51 | /** |
||
52 | * Loop through the stream, pulling maximum type length each time, find the largest type that matches and create a |
||
53 | * token, then move on length characters |
||
54 | * |
||
55 | * @return Iterator |
||
56 | */ |
||
57 | 27 | public function getTokens() |
|
58 | { |
||
59 | 27 | $this->stream->rewind(); |
|
60 | 27 | $position = $this->stream->tell(); |
|
61 | 27 | $buffer = $this->stream->read($this->maxTypeLength); |
|
62 | |||
63 | /** @var Token $last */ |
||
64 | 27 | $last = null; |
|
65 | |||
66 | 27 | while (strlen($buffer) > 0) { |
|
67 | 25 | $token = $this->state->match($position, $buffer); |
|
68 | |||
69 | 25 | if ($token->getType() == Token::T_BOM) { |
|
70 | 3 | $this->changeEncoding($token); |
|
71 | } |
||
72 | |||
73 | 25 | $this->state = $this->state->getNextState($token->getType()); |
|
74 | |||
75 | 25 | $len = $token->getLength(); |
|
76 | |||
77 | // merge tokens together to condense T_CONTENT tokens |
||
78 | 25 | if ($token->getType() == Token::T_CONTENT) { |
|
79 | 25 | $last = (!is_null($last)) ? $last->addContent($token->getContent()) : $token; |
|
80 | } else { |
||
81 | 24 | if (!is_null($last)) { |
|
82 | 23 | yield $last; |
|
83 | 23 | $last = null; |
|
84 | } |
||
85 | 24 | yield $token; |
|
86 | } |
||
87 | |||
88 | 25 | $position += $len; |
|
89 | 25 | $buffer = substr($buffer, $len) . $this->stream->read($len); |
|
90 | } |
||
91 | |||
92 | 26 | if (!is_null($last)) { |
|
93 | 7 | yield $last; |
|
94 | } |
||
95 | |||
96 | 26 | $this->stream->close(); |
|
97 | 26 | } |
|
98 | |||
99 | /** |
||
100 | * @param Token $token |
||
101 | */ |
||
102 | 3 | private function changeEncoding(Token $token) |
|
106 | } |
||
107 |